linux/drivers/pci/xen-pcifront.c
<<
>>
Prefs
   1/*
   2 * Xen PCI Frontend.
   3 *
   4 *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
   5 */
   6#include <linux/module.h>
   7#include <linux/init.h>
   8#include <linux/mm.h>
   9#include <xen/xenbus.h>
  10#include <xen/events.h>
  11#include <xen/grant_table.h>
  12#include <xen/page.h>
  13#include <linux/spinlock.h>
  14#include <linux/pci.h>
  15#include <linux/msi.h>
  16#include <xen/interface/io/pciif.h>
  17#include <asm/xen/pci.h>
  18#include <linux/interrupt.h>
  19#include <linux/atomic.h>
  20#include <linux/workqueue.h>
  21#include <linux/bitops.h>
  22#include <linux/time.h>
  23
  24#include <asm/xen/swiotlb-xen.h>
  25#define INVALID_GRANT_REF (0)
  26#define INVALID_EVTCHN    (-1)
  27
  28struct pci_bus_entry {
  29        struct list_head list;
  30        struct pci_bus *bus;
  31};
  32
  33#define _PDEVB_op_active                (0)
  34#define PDEVB_op_active                 (1 << (_PDEVB_op_active))
  35
  36struct pcifront_device {
  37        struct xenbus_device *xdev;
  38        struct list_head root_buses;
  39
  40        int evtchn;
  41        int gnt_ref;
  42
  43        int irq;
  44
  45        /* Lock this when doing any operations in sh_info */
  46        spinlock_t sh_info_lock;
  47        struct xen_pci_sharedinfo *sh_info;
  48        struct work_struct op_work;
  49        unsigned long flags;
  50
  51};
  52
  53struct pcifront_sd {
  54        int domain;
  55        struct pcifront_device *pdev;
  56};
  57
  58static inline struct pcifront_device *
  59pcifront_get_pdev(struct pcifront_sd *sd)
  60{
  61        return sd->pdev;
  62}
  63
  64static inline void pcifront_init_sd(struct pcifront_sd *sd,
  65                                    unsigned int domain, unsigned int bus,
  66                                    struct pcifront_device *pdev)
  67{
  68        sd->domain = domain;
  69        sd->pdev = pdev;
  70}
  71
  72static DEFINE_SPINLOCK(pcifront_dev_lock);
  73static struct pcifront_device *pcifront_dev;
  74
  75static int verbose_request;
  76module_param(verbose_request, int, 0644);
  77
  78static int errno_to_pcibios_err(int errno)
  79{
  80        switch (errno) {
  81        case XEN_PCI_ERR_success:
  82                return PCIBIOS_SUCCESSFUL;
  83
  84        case XEN_PCI_ERR_dev_not_found:
  85                return PCIBIOS_DEVICE_NOT_FOUND;
  86
  87        case XEN_PCI_ERR_invalid_offset:
  88        case XEN_PCI_ERR_op_failed:
  89                return PCIBIOS_BAD_REGISTER_NUMBER;
  90
  91        case XEN_PCI_ERR_not_implemented:
  92                return PCIBIOS_FUNC_NOT_SUPPORTED;
  93
  94        case XEN_PCI_ERR_access_denied:
  95                return PCIBIOS_SET_FAILED;
  96        }
  97        return errno;
  98}
  99
 100static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
 101{
 102        if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
 103                && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
 104                dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
 105                schedule_work(&pdev->op_work);
 106        }
 107}
 108
 109static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
 110{
 111        int err = 0;
 112        struct xen_pci_op *active_op = &pdev->sh_info->op;
 113        unsigned long irq_flags;
 114        evtchn_port_t port = pdev->evtchn;
 115        unsigned irq = pdev->irq;
 116        s64 ns, ns_timeout;
 117        struct timeval tv;
 118
 119        spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
 120
 121        memcpy(active_op, op, sizeof(struct xen_pci_op));
 122
 123        /* Go */
 124        wmb();
 125        set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
 126        notify_remote_via_evtchn(port);
 127
 128        /*
 129         * We set a poll timeout of 3 seconds but give up on return after
 130         * 2 seconds. It is better to time out too late rather than too early
 131         * (in the latter case we end up continually re-executing poll() with a
 132         * timeout in the past). 1s difference gives plenty of slack for error.
 133         */
 134        do_gettimeofday(&tv);
 135        ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
 136
 137        xen_clear_irq_pending(irq);
 138
 139        while (test_bit(_XEN_PCIF_active,
 140                        (unsigned long *)&pdev->sh_info->flags)) {
 141                xen_poll_irq_timeout(irq, jiffies + 3*HZ);
 142                xen_clear_irq_pending(irq);
 143                do_gettimeofday(&tv);
 144                ns = timeval_to_ns(&tv);
 145                if (ns > ns_timeout) {
 146                        dev_err(&pdev->xdev->dev,
 147                                "pciback not responding!!!\n");
 148                        clear_bit(_XEN_PCIF_active,
 149                                  (unsigned long *)&pdev->sh_info->flags);
 150                        err = XEN_PCI_ERR_dev_not_found;
 151                        goto out;
 152                }
 153        }
 154
 155        /*
 156        * We might lose backend service request since we
 157        * reuse same evtchn with pci_conf backend response. So re-schedule
 158        * aer pcifront service.
 159        */
 160        if (test_bit(_XEN_PCIB_active,
 161                        (unsigned long *)&pdev->sh_info->flags)) {
 162                dev_err(&pdev->xdev->dev,
 163                        "schedule aer pcifront service\n");
 164                schedule_pcifront_aer_op(pdev);
 165        }
 166
 167        memcpy(op, active_op, sizeof(struct xen_pci_op));
 168
 169        err = op->err;
 170out:
 171        spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
 172        return err;
 173}
 174
 175/* Access to this function is spinlocked in drivers/pci/access.c */
 176static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
 177                             int where, int size, u32 *val)
 178{
 179        int err = 0;
 180        struct xen_pci_op op = {
 181                .cmd    = XEN_PCI_OP_conf_read,
 182                .domain = pci_domain_nr(bus),
 183                .bus    = bus->number,
 184                .devfn  = devfn,
 185                .offset = where,
 186                .size   = size,
 187        };
 188        struct pcifront_sd *sd = bus->sysdata;
 189        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 190
 191        if (verbose_request)
 192                dev_info(&pdev->xdev->dev,
 193                         "read dev=%04x:%02x:%02x.%d - offset %x size %d\n",
 194                         pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
 195                         PCI_FUNC(devfn), where, size);
 196
 197        err = do_pci_op(pdev, &op);
 198
 199        if (likely(!err)) {
 200                if (verbose_request)
 201                        dev_info(&pdev->xdev->dev, "read got back value %x\n",
 202                                 op.value);
 203
 204                *val = op.value;
 205        } else if (err == -ENODEV) {
 206                /* No device here, pretend that it just returned 0 */
 207                err = 0;
 208                *val = 0;
 209        }
 210
 211        return errno_to_pcibios_err(err);
 212}
 213
 214/* Access to this function is spinlocked in drivers/pci/access.c */
 215static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
 216                              int where, int size, u32 val)
 217{
 218        struct xen_pci_op op = {
 219                .cmd    = XEN_PCI_OP_conf_write,
 220                .domain = pci_domain_nr(bus),
 221                .bus    = bus->number,
 222                .devfn  = devfn,
 223                .offset = where,
 224                .size   = size,
 225                .value  = val,
 226        };
 227        struct pcifront_sd *sd = bus->sysdata;
 228        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 229
 230        if (verbose_request)
 231                dev_info(&pdev->xdev->dev,
 232                         "write dev=%04x:%02x:%02x.%d - "
 233                         "offset %x size %d val %x\n",
 234                         pci_domain_nr(bus), bus->number,
 235                         PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
 236
 237        return errno_to_pcibios_err(do_pci_op(pdev, &op));
 238}
 239
 240static struct pci_ops pcifront_bus_ops = {
 241        .read = pcifront_bus_read,
 242        .write = pcifront_bus_write,
 243};
 244
 245#ifdef CONFIG_PCI_MSI
 246static int pci_frontend_enable_msix(struct pci_dev *dev,
 247                                    int vector[], int nvec)
 248{
 249        int err;
 250        int i;
 251        struct xen_pci_op op = {
 252                .cmd    = XEN_PCI_OP_enable_msix,
 253                .domain = pci_domain_nr(dev->bus),
 254                .bus = dev->bus->number,
 255                .devfn = dev->devfn,
 256                .value = nvec,
 257        };
 258        struct pcifront_sd *sd = dev->bus->sysdata;
 259        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 260        struct msi_desc *entry;
 261
 262        if (nvec > SH_INFO_MAX_VEC) {
 263                dev_err(&dev->dev, "too much vector for pci frontend: %x."
 264                                   " Increase SH_INFO_MAX_VEC.\n", nvec);
 265                return -EINVAL;
 266        }
 267
 268        i = 0;
 269        list_for_each_entry(entry, &dev->msi_list, list) {
 270                op.msix_entries[i].entry = entry->msi_attrib.entry_nr;
 271                /* Vector is useless at this point. */
 272                op.msix_entries[i].vector = -1;
 273                i++;
 274        }
 275
 276        err = do_pci_op(pdev, &op);
 277
 278        if (likely(!err)) {
 279                if (likely(!op.value)) {
 280                        /* we get the result */
 281                        for (i = 0; i < nvec; i++) {
 282                                if (op.msix_entries[i].vector <= 0) {
 283                                        dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
 284                                                i, op.msix_entries[i].vector);
 285                                        err = -EINVAL;
 286                                        vector[i] = -1;
 287                                        continue;
 288                                }
 289                                vector[i] = op.msix_entries[i].vector;
 290                        }
 291                } else {
 292                        printk(KERN_DEBUG "enable msix get value %x\n",
 293                                op.value);
 294                        err = op.value;
 295                }
 296        } else {
 297                dev_err(&dev->dev, "enable msix get err %x\n", err);
 298        }
 299        return err;
 300}
 301
 302static void pci_frontend_disable_msix(struct pci_dev *dev)
 303{
 304        int err;
 305        struct xen_pci_op op = {
 306                .cmd    = XEN_PCI_OP_disable_msix,
 307                .domain = pci_domain_nr(dev->bus),
 308                .bus = dev->bus->number,
 309                .devfn = dev->devfn,
 310        };
 311        struct pcifront_sd *sd = dev->bus->sysdata;
 312        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 313
 314        err = do_pci_op(pdev, &op);
 315
 316        /* What should do for error ? */
 317        if (err)
 318                dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
 319}
 320
 321static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
 322{
 323        int err;
 324        struct xen_pci_op op = {
 325                .cmd    = XEN_PCI_OP_enable_msi,
 326                .domain = pci_domain_nr(dev->bus),
 327                .bus = dev->bus->number,
 328                .devfn = dev->devfn,
 329        };
 330        struct pcifront_sd *sd = dev->bus->sysdata;
 331        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 332
 333        err = do_pci_op(pdev, &op);
 334        if (likely(!err)) {
 335                vector[0] = op.value;
 336                if (op.value <= 0) {
 337                        dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
 338                                op.value);
 339                        err = -EINVAL;
 340                        vector[0] = -1;
 341                }
 342        } else {
 343                dev_err(&dev->dev, "pci frontend enable msi failed for dev "
 344                                    "%x:%x\n", op.bus, op.devfn);
 345                err = -EINVAL;
 346        }
 347        return err;
 348}
 349
 350static void pci_frontend_disable_msi(struct pci_dev *dev)
 351{
 352        int err;
 353        struct xen_pci_op op = {
 354                .cmd    = XEN_PCI_OP_disable_msi,
 355                .domain = pci_domain_nr(dev->bus),
 356                .bus = dev->bus->number,
 357                .devfn = dev->devfn,
 358        };
 359        struct pcifront_sd *sd = dev->bus->sysdata;
 360        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 361
 362        err = do_pci_op(pdev, &op);
 363        if (err == XEN_PCI_ERR_dev_not_found) {
 364                /* XXX No response from backend, what shall we do? */
 365                printk(KERN_DEBUG "get no response from backend for disable MSI\n");
 366                return;
 367        }
 368        if (err)
 369                /* how can pciback notify us fail? */
 370                printk(KERN_DEBUG "get fake response frombackend\n");
 371}
 372
 373static struct xen_pci_frontend_ops pci_frontend_ops = {
 374        .enable_msi = pci_frontend_enable_msi,
 375        .disable_msi = pci_frontend_disable_msi,
 376        .enable_msix = pci_frontend_enable_msix,
 377        .disable_msix = pci_frontend_disable_msix,
 378};
 379
 380static void pci_frontend_registrar(int enable)
 381{
 382        if (enable)
 383                xen_pci_frontend = &pci_frontend_ops;
 384        else
 385                xen_pci_frontend = NULL;
 386};
 387#else
 388static inline void pci_frontend_registrar(int enable) { };
 389#endif /* CONFIG_PCI_MSI */
 390
 391/* Claim resources for the PCI frontend as-is, backend won't allow changes */
 392static int pcifront_claim_resource(struct pci_dev *dev, void *data)
 393{
 394        struct pcifront_device *pdev = data;
 395        int i;
 396        struct resource *r;
 397
 398        for (i = 0; i < PCI_NUM_RESOURCES; i++) {
 399                r = &dev->resource[i];
 400
 401                if (!r->parent && r->start && r->flags) {
 402                        dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
 403                                pci_name(dev), i);
 404                        if (pci_claim_resource(dev, i)) {
 405                                dev_err(&pdev->xdev->dev, "Could not claim resource %s/%d! "
 406                                        "Device offline. Try using e820_host=1 in the guest config.\n",
 407                                        pci_name(dev), i);
 408                        }
 409                }
 410        }
 411
 412        return 0;
 413}
 414
 415static int pcifront_scan_bus(struct pcifront_device *pdev,
 416                                unsigned int domain, unsigned int bus,
 417                                struct pci_bus *b)
 418{
 419        struct pci_dev *d;
 420        unsigned int devfn;
 421
 422        /* Scan the bus for functions and add.
 423         * We omit handling of PCI bridge attachment because pciback prevents
 424         * bridges from being exported.
 425         */
 426        for (devfn = 0; devfn < 0x100; devfn++) {
 427                d = pci_get_slot(b, devfn);
 428                if (d) {
 429                        /* Device is already known. */
 430                        pci_dev_put(d);
 431                        continue;
 432                }
 433
 434                d = pci_scan_single_device(b, devfn);
 435                if (d)
 436                        dev_info(&pdev->xdev->dev, "New device on "
 437                                 "%04x:%02x:%02x.%d found.\n", domain, bus,
 438                                 PCI_SLOT(devfn), PCI_FUNC(devfn));
 439        }
 440
 441        return 0;
 442}
 443
 444static int pcifront_scan_root(struct pcifront_device *pdev,
 445                                 unsigned int domain, unsigned int bus)
 446{
 447        struct pci_bus *b;
 448        struct pcifront_sd *sd = NULL;
 449        struct pci_bus_entry *bus_entry = NULL;
 450        int err = 0;
 451
 452#ifndef CONFIG_PCI_DOMAINS
 453        if (domain != 0) {
 454                dev_err(&pdev->xdev->dev,
 455                        "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
 456                dev_err(&pdev->xdev->dev,
 457                        "Please compile with CONFIG_PCI_DOMAINS\n");
 458                err = -EINVAL;
 459                goto err_out;
 460        }
 461#endif
 462
 463        dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
 464                 domain, bus);
 465
 466        bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
 467        sd = kmalloc(sizeof(*sd), GFP_KERNEL);
 468        if (!bus_entry || !sd) {
 469                err = -ENOMEM;
 470                goto err_out;
 471        }
 472        pcifront_init_sd(sd, domain, bus, pdev);
 473
 474        b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
 475                                  &pcifront_bus_ops, sd);
 476        if (!b) {
 477                dev_err(&pdev->xdev->dev,
 478                        "Error creating PCI Frontend Bus!\n");
 479                err = -ENOMEM;
 480                goto err_out;
 481        }
 482
 483        bus_entry->bus = b;
 484
 485        list_add(&bus_entry->list, &pdev->root_buses);
 486
 487        /* pci_scan_bus_parented skips devices which do not have a have
 488        * devfn==0. The pcifront_scan_bus enumerates all devfn. */
 489        err = pcifront_scan_bus(pdev, domain, bus, b);
 490
 491        /* Claim resources before going "live" with our devices */
 492        pci_walk_bus(b, pcifront_claim_resource, pdev);
 493
 494        /* Create SysFS and notify udev of the devices. Aka: "going live" */
 495        pci_bus_add_devices(b);
 496
 497        return err;
 498
 499err_out:
 500        kfree(bus_entry);
 501        kfree(sd);
 502
 503        return err;
 504}
 505
 506static int pcifront_rescan_root(struct pcifront_device *pdev,
 507                                   unsigned int domain, unsigned int bus)
 508{
 509        int err;
 510        struct pci_bus *b;
 511
 512#ifndef CONFIG_PCI_DOMAINS
 513        if (domain != 0) {
 514                dev_err(&pdev->xdev->dev,
 515                        "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
 516                dev_err(&pdev->xdev->dev,
 517                        "Please compile with CONFIG_PCI_DOMAINS\n");
 518                return -EINVAL;
 519        }
 520#endif
 521
 522        dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
 523                 domain, bus);
 524
 525        b = pci_find_bus(domain, bus);
 526        if (!b)
 527                /* If the bus is unknown, create it. */
 528                return pcifront_scan_root(pdev, domain, bus);
 529
 530        err = pcifront_scan_bus(pdev, domain, bus, b);
 531
 532        /* Claim resources before going "live" with our devices */
 533        pci_walk_bus(b, pcifront_claim_resource, pdev);
 534
 535        /* Create SysFS and notify udev of the devices. Aka: "going live" */
 536        pci_bus_add_devices(b);
 537
 538        return err;
 539}
 540
 541static void free_root_bus_devs(struct pci_bus *bus)
 542{
 543        struct pci_dev *dev;
 544
 545        while (!list_empty(&bus->devices)) {
 546                dev = container_of(bus->devices.next, struct pci_dev,
 547                                   bus_list);
 548                dev_dbg(&dev->dev, "removing device\n");
 549                pci_stop_and_remove_bus_device(dev);
 550        }
 551}
 552
 553static void pcifront_free_roots(struct pcifront_device *pdev)
 554{
 555        struct pci_bus_entry *bus_entry, *t;
 556
 557        dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
 558
 559        list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
 560                list_del(&bus_entry->list);
 561
 562                free_root_bus_devs(bus_entry->bus);
 563
 564                kfree(bus_entry->bus->sysdata);
 565
 566                device_unregister(bus_entry->bus->bridge);
 567                pci_remove_bus(bus_entry->bus);
 568
 569                kfree(bus_entry);
 570        }
 571}
 572
 573static pci_ers_result_t pcifront_common_process(int cmd,
 574                                                struct pcifront_device *pdev,
 575                                                pci_channel_state_t state)
 576{
 577        pci_ers_result_t result;
 578        struct pci_driver *pdrv;
 579        int bus = pdev->sh_info->aer_op.bus;
 580        int devfn = pdev->sh_info->aer_op.devfn;
 581        struct pci_dev *pcidev;
 582        int flag = 0;
 583
 584        dev_dbg(&pdev->xdev->dev,
 585                "pcifront AER process: cmd %x (bus:%x, devfn%x)",
 586                cmd, bus, devfn);
 587        result = PCI_ERS_RESULT_NONE;
 588
 589        pcidev = pci_get_bus_and_slot(bus, devfn);
 590        if (!pcidev || !pcidev->driver) {
 591                dev_err(&pdev->xdev->dev, "device or AER driver is NULL\n");
 592                if (pcidev)
 593                        pci_dev_put(pcidev);
 594                return result;
 595        }
 596        pdrv = pcidev->driver;
 597
 598        if (pdrv) {
 599                if (pdrv->err_handler && pdrv->err_handler->error_detected) {
 600                        dev_dbg(&pcidev->dev,
 601                                "trying to call AER service\n");
 602                        if (pcidev) {
 603                                flag = 1;
 604                                switch (cmd) {
 605                                case XEN_PCI_OP_aer_detected:
 606                                        result = pdrv->err_handler->
 607                                                 error_detected(pcidev, state);
 608                                        break;
 609                                case XEN_PCI_OP_aer_mmio:
 610                                        result = pdrv->err_handler->
 611                                                 mmio_enabled(pcidev);
 612                                        break;
 613                                case XEN_PCI_OP_aer_slotreset:
 614                                        result = pdrv->err_handler->
 615                                                 slot_reset(pcidev);
 616                                        break;
 617                                case XEN_PCI_OP_aer_resume:
 618                                        pdrv->err_handler->resume(pcidev);
 619                                        break;
 620                                default:
 621                                        dev_err(&pdev->xdev->dev,
 622                                                "bad request in aer recovery "
 623                                                "operation!\n");
 624
 625                                }
 626                        }
 627                }
 628        }
 629        if (!flag)
 630                result = PCI_ERS_RESULT_NONE;
 631
 632        return result;
 633}
 634
 635
 636static void pcifront_do_aer(struct work_struct *data)
 637{
 638        struct pcifront_device *pdev =
 639                container_of(data, struct pcifront_device, op_work);
 640        int cmd = pdev->sh_info->aer_op.cmd;
 641        pci_channel_state_t state =
 642                (pci_channel_state_t)pdev->sh_info->aer_op.err;
 643
 644        /*If a pci_conf op is in progress,
 645                we have to wait until it is done before service aer op*/
 646        dev_dbg(&pdev->xdev->dev,
 647                "pcifront service aer bus %x devfn %x\n",
 648                pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
 649
 650        pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state);
 651
 652        /* Post the operation to the guest. */
 653        wmb();
 654        clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags);
 655        notify_remote_via_evtchn(pdev->evtchn);
 656
 657        /*in case of we lost an aer request in four lines time_window*/
 658        smp_mb__before_clear_bit();
 659        clear_bit(_PDEVB_op_active, &pdev->flags);
 660        smp_mb__after_clear_bit();
 661
 662        schedule_pcifront_aer_op(pdev);
 663
 664}
 665
 666static irqreturn_t pcifront_handler_aer(int irq, void *dev)
 667{
 668        struct pcifront_device *pdev = dev;
 669        schedule_pcifront_aer_op(pdev);
 670        return IRQ_HANDLED;
 671}
 672static int pcifront_connect_and_init_dma(struct pcifront_device *pdev)
 673{
 674        int err = 0;
 675
 676        spin_lock(&pcifront_dev_lock);
 677
 678        if (!pcifront_dev) {
 679                dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
 680                pcifront_dev = pdev;
 681        } else {
 682                dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
 683                err = -EEXIST;
 684        }
 685        spin_unlock(&pcifront_dev_lock);
 686
 687        if (!err && !swiotlb_nr_tbl()) {
 688                err = pci_xen_swiotlb_init_late();
 689                if (err)
 690                        dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
 691        }
 692        return err;
 693}
 694
 695static void pcifront_disconnect(struct pcifront_device *pdev)
 696{
 697        spin_lock(&pcifront_dev_lock);
 698
 699        if (pdev == pcifront_dev) {
 700                dev_info(&pdev->xdev->dev,
 701                         "Disconnecting PCI Frontend Buses\n");
 702                pcifront_dev = NULL;
 703        }
 704
 705        spin_unlock(&pcifront_dev_lock);
 706}
 707static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
 708{
 709        struct pcifront_device *pdev;
 710
 711        pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
 712        if (pdev == NULL)
 713                goto out;
 714
 715        pdev->sh_info =
 716            (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
 717        if (pdev->sh_info == NULL) {
 718                kfree(pdev);
 719                pdev = NULL;
 720                goto out;
 721        }
 722        pdev->sh_info->flags = 0;
 723
 724        /*Flag for registering PV AER handler*/
 725        set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags);
 726
 727        dev_set_drvdata(&xdev->dev, pdev);
 728        pdev->xdev = xdev;
 729
 730        INIT_LIST_HEAD(&pdev->root_buses);
 731
 732        spin_lock_init(&pdev->sh_info_lock);
 733
 734        pdev->evtchn = INVALID_EVTCHN;
 735        pdev->gnt_ref = INVALID_GRANT_REF;
 736        pdev->irq = -1;
 737
 738        INIT_WORK(&pdev->op_work, pcifront_do_aer);
 739
 740        dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
 741                pdev, pdev->sh_info);
 742out:
 743        return pdev;
 744}
 745
 746static void free_pdev(struct pcifront_device *pdev)
 747{
 748        dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
 749
 750        pcifront_free_roots(pdev);
 751
 752        cancel_work_sync(&pdev->op_work);
 753
 754        if (pdev->irq >= 0)
 755                unbind_from_irqhandler(pdev->irq, pdev);
 756
 757        if (pdev->evtchn != INVALID_EVTCHN)
 758                xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
 759
 760        if (pdev->gnt_ref != INVALID_GRANT_REF)
 761                gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */,
 762                                          (unsigned long)pdev->sh_info);
 763        else
 764                free_page((unsigned long)pdev->sh_info);
 765
 766        dev_set_drvdata(&pdev->xdev->dev, NULL);
 767
 768        kfree(pdev);
 769}
 770
 771static int pcifront_publish_info(struct pcifront_device *pdev)
 772{
 773        int err = 0;
 774        struct xenbus_transaction trans;
 775
 776        err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
 777        if (err < 0)
 778                goto out;
 779
 780        pdev->gnt_ref = err;
 781
 782        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
 783        if (err)
 784                goto out;
 785
 786        err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer,
 787                0, "pcifront", pdev);
 788
 789        if (err < 0)
 790                return err;
 791
 792        pdev->irq = err;
 793
 794do_publish:
 795        err = xenbus_transaction_start(&trans);
 796        if (err) {
 797                xenbus_dev_fatal(pdev->xdev, err,
 798                                 "Error writing configuration for backend "
 799                                 "(start transaction)");
 800                goto out;
 801        }
 802
 803        err = xenbus_printf(trans, pdev->xdev->nodename,
 804                            "pci-op-ref", "%u", pdev->gnt_ref);
 805        if (!err)
 806                err = xenbus_printf(trans, pdev->xdev->nodename,
 807                                    "event-channel", "%u", pdev->evtchn);
 808        if (!err)
 809                err = xenbus_printf(trans, pdev->xdev->nodename,
 810                                    "magic", XEN_PCI_MAGIC);
 811
 812        if (err) {
 813                xenbus_transaction_end(trans, 1);
 814                xenbus_dev_fatal(pdev->xdev, err,
 815                                 "Error writing configuration for backend");
 816                goto out;
 817        } else {
 818                err = xenbus_transaction_end(trans, 0);
 819                if (err == -EAGAIN)
 820                        goto do_publish;
 821                else if (err) {
 822                        xenbus_dev_fatal(pdev->xdev, err,
 823                                         "Error completing transaction "
 824                                         "for backend");
 825                        goto out;
 826                }
 827        }
 828
 829        xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
 830
 831        dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
 832
 833out:
 834        return err;
 835}
 836
 837static int pcifront_try_connect(struct pcifront_device *pdev)
 838{
 839        int err = -EFAULT;
 840        int i, num_roots, len;
 841        char str[64];
 842        unsigned int domain, bus;
 843
 844
 845        /* Only connect once */
 846        if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 847            XenbusStateInitialised)
 848                goto out;
 849
 850        err = pcifront_connect_and_init_dma(pdev);
 851        if (err) {
 852                xenbus_dev_fatal(pdev->xdev, err,
 853                                 "Error setting up PCI Frontend");
 854                goto out;
 855        }
 856
 857        err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
 858                           "root_num", "%d", &num_roots);
 859        if (err == -ENOENT) {
 860                xenbus_dev_error(pdev->xdev, err,
 861                                 "No PCI Roots found, trying 0000:00");
 862                err = pcifront_scan_root(pdev, 0, 0);
 863                num_roots = 0;
 864        } else if (err != 1) {
 865                if (err == 0)
 866                        err = -EINVAL;
 867                xenbus_dev_fatal(pdev->xdev, err,
 868                                 "Error reading number of PCI roots");
 869                goto out;
 870        }
 871
 872        for (i = 0; i < num_roots; i++) {
 873                len = snprintf(str, sizeof(str), "root-%d", i);
 874                if (unlikely(len >= (sizeof(str) - 1))) {
 875                        err = -ENOMEM;
 876                        goto out;
 877                }
 878
 879                err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
 880                                   "%x:%x", &domain, &bus);
 881                if (err != 2) {
 882                        if (err >= 0)
 883                                err = -EINVAL;
 884                        xenbus_dev_fatal(pdev->xdev, err,
 885                                         "Error reading PCI root %d", i);
 886                        goto out;
 887                }
 888
 889                err = pcifront_scan_root(pdev, domain, bus);
 890                if (err) {
 891                        xenbus_dev_fatal(pdev->xdev, err,
 892                                         "Error scanning PCI root %04x:%02x",
 893                                         domain, bus);
 894                        goto out;
 895                }
 896        }
 897
 898        err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
 899
 900out:
 901        return err;
 902}
 903
 904static int pcifront_try_disconnect(struct pcifront_device *pdev)
 905{
 906        int err = 0;
 907        enum xenbus_state prev_state;
 908
 909
 910        prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
 911
 912        if (prev_state >= XenbusStateClosing)
 913                goto out;
 914
 915        if (prev_state == XenbusStateConnected) {
 916                pcifront_free_roots(pdev);
 917                pcifront_disconnect(pdev);
 918        }
 919
 920        err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
 921
 922out:
 923
 924        return err;
 925}
 926
 927static int pcifront_attach_devices(struct pcifront_device *pdev)
 928{
 929        int err = -EFAULT;
 930        int i, num_roots, len;
 931        unsigned int domain, bus;
 932        char str[64];
 933
 934        if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 935            XenbusStateReconfiguring)
 936                goto out;
 937
 938        err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
 939                           "root_num", "%d", &num_roots);
 940        if (err == -ENOENT) {
 941                xenbus_dev_error(pdev->xdev, err,
 942                                 "No PCI Roots found, trying 0000:00");
 943                err = pcifront_rescan_root(pdev, 0, 0);
 944                num_roots = 0;
 945        } else if (err != 1) {
 946                if (err == 0)
 947                        err = -EINVAL;
 948                xenbus_dev_fatal(pdev->xdev, err,
 949                                 "Error reading number of PCI roots");
 950                goto out;
 951        }
 952
 953        for (i = 0; i < num_roots; i++) {
 954                len = snprintf(str, sizeof(str), "root-%d", i);
 955                if (unlikely(len >= (sizeof(str) - 1))) {
 956                        err = -ENOMEM;
 957                        goto out;
 958                }
 959
 960                err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
 961                                   "%x:%x", &domain, &bus);
 962                if (err != 2) {
 963                        if (err >= 0)
 964                                err = -EINVAL;
 965                        xenbus_dev_fatal(pdev->xdev, err,
 966                                         "Error reading PCI root %d", i);
 967                        goto out;
 968                }
 969
 970                err = pcifront_rescan_root(pdev, domain, bus);
 971                if (err) {
 972                        xenbus_dev_fatal(pdev->xdev, err,
 973                                         "Error scanning PCI root %04x:%02x",
 974                                         domain, bus);
 975                        goto out;
 976                }
 977        }
 978
 979        xenbus_switch_state(pdev->xdev, XenbusStateConnected);
 980
 981out:
 982        return err;
 983}
 984
 985static int pcifront_detach_devices(struct pcifront_device *pdev)
 986{
 987        int err = 0;
 988        int i, num_devs;
 989        unsigned int domain, bus, slot, func;
 990        struct pci_dev *pci_dev;
 991        char str[64];
 992
 993        if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 994            XenbusStateConnected)
 995                goto out;
 996
 997        err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
 998                           &num_devs);
 999        if (err != 1) {
1000                if (err >= 0)
1001                        err = -EINVAL;
1002                xenbus_dev_fatal(pdev->xdev, err,
1003                                 "Error reading number of PCI devices");
1004                goto out;
1005        }
1006
1007        /* Find devices being detached and remove them. */
1008        for (i = 0; i < num_devs; i++) {
1009                int l, state;
1010                l = snprintf(str, sizeof(str), "state-%d", i);
1011                if (unlikely(l >= (sizeof(str) - 1))) {
1012                        err = -ENOMEM;
1013                        goto out;
1014                }
1015                err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d",
1016                                   &state);
1017                if (err != 1)
1018                        state = XenbusStateUnknown;
1019
1020                if (state != XenbusStateClosing)
1021                        continue;
1022
1023                /* Remove device. */
1024                l = snprintf(str, sizeof(str), "vdev-%d", i);
1025                if (unlikely(l >= (sizeof(str) - 1))) {
1026                        err = -ENOMEM;
1027                        goto out;
1028                }
1029                err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
1030                                   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
1031                if (err != 4) {
1032                        if (err >= 0)
1033                                err = -EINVAL;
1034                        xenbus_dev_fatal(pdev->xdev, err,
1035                                         "Error reading PCI device %d", i);
1036                        goto out;
1037                }
1038
1039                pci_dev = pci_get_domain_bus_and_slot(domain, bus,
1040                                PCI_DEVFN(slot, func));
1041                if (!pci_dev) {
1042                        dev_dbg(&pdev->xdev->dev,
1043                                "Cannot get PCI device %04x:%02x:%02x.%d\n",
1044                                domain, bus, slot, func);
1045                        continue;
1046                }
1047                pci_stop_and_remove_bus_device(pci_dev);
1048                pci_dev_put(pci_dev);
1049
1050                dev_dbg(&pdev->xdev->dev,
1051                        "PCI device %04x:%02x:%02x.%d removed.\n",
1052                        domain, bus, slot, func);
1053        }
1054
1055        err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
1056
1057out:
1058        return err;
1059}
1060
1061static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
1062                                                  enum xenbus_state be_state)
1063{
1064        struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
1065
1066        switch (be_state) {
1067        case XenbusStateUnknown:
1068        case XenbusStateInitialising:
1069        case XenbusStateInitWait:
1070        case XenbusStateInitialised:
1071                break;
1072
1073        case XenbusStateConnected:
1074                pcifront_try_connect(pdev);
1075                break;
1076
1077        case XenbusStateClosed:
1078                if (xdev->state == XenbusStateClosed)
1079                        break;
1080                /* Missed the backend's CLOSING state -- fallthrough */
1081        case XenbusStateClosing:
1082                dev_warn(&xdev->dev, "backend going away!\n");
1083                pcifront_try_disconnect(pdev);
1084                break;
1085
1086        case XenbusStateReconfiguring:
1087                pcifront_detach_devices(pdev);
1088                break;
1089
1090        case XenbusStateReconfigured:
1091                pcifront_attach_devices(pdev);
1092                break;
1093        }
1094}
1095
1096static int pcifront_xenbus_probe(struct xenbus_device *xdev,
1097                                 const struct xenbus_device_id *id)
1098{
1099        int err = 0;
1100        struct pcifront_device *pdev = alloc_pdev(xdev);
1101
1102        if (pdev == NULL) {
1103                err = -ENOMEM;
1104                xenbus_dev_fatal(xdev, err,
1105                                 "Error allocating pcifront_device struct");
1106                goto out;
1107        }
1108
1109        err = pcifront_publish_info(pdev);
1110        if (err)
1111                free_pdev(pdev);
1112
1113out:
1114        return err;
1115}
1116
1117static int pcifront_xenbus_remove(struct xenbus_device *xdev)
1118{
1119        struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
1120        if (pdev)
1121                free_pdev(pdev);
1122
1123        return 0;
1124}
1125
1126static const struct xenbus_device_id xenpci_ids[] = {
1127        {"pci"},
1128        {""},
1129};
1130
1131static DEFINE_XENBUS_DRIVER(xenpci, "pcifront",
1132        .probe                  = pcifront_xenbus_probe,
1133        .remove                 = pcifront_xenbus_remove,
1134        .otherend_changed       = pcifront_backend_changed,
1135);
1136
1137static int __init pcifront_init(void)
1138{
1139        if (!xen_pv_domain() || xen_initial_domain())
1140                return -ENODEV;
1141
1142        pci_frontend_registrar(1 /* enable */);
1143
1144        return xenbus_register_frontend(&xenpci_driver);
1145}
1146
1147static void __exit pcifront_cleanup(void)
1148{
1149        xenbus_unregister_driver(&xenpci_driver);
1150        pci_frontend_registrar(0 /* disable */);
1151}
1152module_init(pcifront_init);
1153module_exit(pcifront_cleanup);
1154
1155MODULE_DESCRIPTION("Xen PCI passthrough frontend.");
1156MODULE_LICENSE("GPL");
1157MODULE_ALIAS("xen:pci");
1158