linux/drivers/pci/xen-pcifront.c
<<
>>
Prefs
   1/*
   2 * Xen PCI Frontend.
   3 *
   4 *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
   5 */
   6#include <linux/module.h>
   7#include <linux/init.h>
   8#include <linux/mm.h>
   9#include <xen/xenbus.h>
  10#include <xen/events.h>
  11#include <xen/grant_table.h>
  12#include <xen/page.h>
  13#include <linux/spinlock.h>
  14#include <linux/pci.h>
  15#include <linux/msi.h>
  16#include <xen/interface/io/pciif.h>
  17#include <asm/xen/pci.h>
  18#include <linux/interrupt.h>
  19#include <asm/atomic.h>
  20#include <linux/workqueue.h>
  21#include <linux/bitops.h>
  22#include <linux/time.h>
  23
  24#define INVALID_GRANT_REF (0)
  25#define INVALID_EVTCHN    (-1)
  26
  27struct pci_bus_entry {
  28        struct list_head list;
  29        struct pci_bus *bus;
  30};
  31
  32#define _PDEVB_op_active                (0)
  33#define PDEVB_op_active                 (1 << (_PDEVB_op_active))
  34
  35struct pcifront_device {
  36        struct xenbus_device *xdev;
  37        struct list_head root_buses;
  38
  39        int evtchn;
  40        int gnt_ref;
  41
  42        int irq;
  43
  44        /* Lock this when doing any operations in sh_info */
  45        spinlock_t sh_info_lock;
  46        struct xen_pci_sharedinfo *sh_info;
  47        struct work_struct op_work;
  48        unsigned long flags;
  49
  50};
  51
  52struct pcifront_sd {
  53        int domain;
  54        struct pcifront_device *pdev;
  55};
  56
  57static inline struct pcifront_device *
  58pcifront_get_pdev(struct pcifront_sd *sd)
  59{
  60        return sd->pdev;
  61}
  62
  63static inline void pcifront_init_sd(struct pcifront_sd *sd,
  64                                    unsigned int domain, unsigned int bus,
  65                                    struct pcifront_device *pdev)
  66{
  67        sd->domain = domain;
  68        sd->pdev = pdev;
  69}
  70
  71static DEFINE_SPINLOCK(pcifront_dev_lock);
  72static struct pcifront_device *pcifront_dev;
  73
  74static int verbose_request;
  75module_param(verbose_request, int, 0644);
  76
  77static int errno_to_pcibios_err(int errno)
  78{
  79        switch (errno) {
  80        case XEN_PCI_ERR_success:
  81                return PCIBIOS_SUCCESSFUL;
  82
  83        case XEN_PCI_ERR_dev_not_found:
  84                return PCIBIOS_DEVICE_NOT_FOUND;
  85
  86        case XEN_PCI_ERR_invalid_offset:
  87        case XEN_PCI_ERR_op_failed:
  88                return PCIBIOS_BAD_REGISTER_NUMBER;
  89
  90        case XEN_PCI_ERR_not_implemented:
  91                return PCIBIOS_FUNC_NOT_SUPPORTED;
  92
  93        case XEN_PCI_ERR_access_denied:
  94                return PCIBIOS_SET_FAILED;
  95        }
  96        return errno;
  97}
  98
  99static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
 100{
 101        if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
 102                && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
 103                dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
 104                schedule_work(&pdev->op_work);
 105        }
 106}
 107
 108static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
 109{
 110        int err = 0;
 111        struct xen_pci_op *active_op = &pdev->sh_info->op;
 112        unsigned long irq_flags;
 113        evtchn_port_t port = pdev->evtchn;
 114        unsigned irq = pdev->irq;
 115        s64 ns, ns_timeout;
 116        struct timeval tv;
 117
 118        spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
 119
 120        memcpy(active_op, op, sizeof(struct xen_pci_op));
 121
 122        /* Go */
 123        wmb();
 124        set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
 125        notify_remote_via_evtchn(port);
 126
 127        /*
 128         * We set a poll timeout of 3 seconds but give up on return after
 129         * 2 seconds. It is better to time out too late rather than too early
 130         * (in the latter case we end up continually re-executing poll() with a
 131         * timeout in the past). 1s difference gives plenty of slack for error.
 132         */
 133        do_gettimeofday(&tv);
 134        ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
 135
 136        xen_clear_irq_pending(irq);
 137
 138        while (test_bit(_XEN_PCIF_active,
 139                        (unsigned long *)&pdev->sh_info->flags)) {
 140                xen_poll_irq_timeout(irq, jiffies + 3*HZ);
 141                xen_clear_irq_pending(irq);
 142                do_gettimeofday(&tv);
 143                ns = timeval_to_ns(&tv);
 144                if (ns > ns_timeout) {
 145                        dev_err(&pdev->xdev->dev,
 146                                "pciback not responding!!!\n");
 147                        clear_bit(_XEN_PCIF_active,
 148                                  (unsigned long *)&pdev->sh_info->flags);
 149                        err = XEN_PCI_ERR_dev_not_found;
 150                        goto out;
 151                }
 152        }
 153
 154        /*
 155        * We might lose backend service request since we
 156        * reuse same evtchn with pci_conf backend response. So re-schedule
 157        * aer pcifront service.
 158        */
 159        if (test_bit(_XEN_PCIB_active,
 160                        (unsigned long *)&pdev->sh_info->flags)) {
 161                dev_err(&pdev->xdev->dev,
 162                        "schedule aer pcifront service\n");
 163                schedule_pcifront_aer_op(pdev);
 164        }
 165
 166        memcpy(op, active_op, sizeof(struct xen_pci_op));
 167
 168        err = op->err;
 169out:
 170        spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
 171        return err;
 172}
 173
 174/* Access to this function is spinlocked in drivers/pci/access.c */
 175static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
 176                             int where, int size, u32 *val)
 177{
 178        int err = 0;
 179        struct xen_pci_op op = {
 180                .cmd    = XEN_PCI_OP_conf_read,
 181                .domain = pci_domain_nr(bus),
 182                .bus    = bus->number,
 183                .devfn  = devfn,
 184                .offset = where,
 185                .size   = size,
 186        };
 187        struct pcifront_sd *sd = bus->sysdata;
 188        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 189
 190        if (verbose_request)
 191                dev_info(&pdev->xdev->dev,
 192                         "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
 193                         pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
 194                         PCI_FUNC(devfn), where, size);
 195
 196        err = do_pci_op(pdev, &op);
 197
 198        if (likely(!err)) {
 199                if (verbose_request)
 200                        dev_info(&pdev->xdev->dev, "read got back value %x\n",
 201                                 op.value);
 202
 203                *val = op.value;
 204        } else if (err == -ENODEV) {
 205                /* No device here, pretend that it just returned 0 */
 206                err = 0;
 207                *val = 0;
 208        }
 209
 210        return errno_to_pcibios_err(err);
 211}
 212
 213/* Access to this function is spinlocked in drivers/pci/access.c */
 214static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
 215                              int where, int size, u32 val)
 216{
 217        struct xen_pci_op op = {
 218                .cmd    = XEN_PCI_OP_conf_write,
 219                .domain = pci_domain_nr(bus),
 220                .bus    = bus->number,
 221                .devfn  = devfn,
 222                .offset = where,
 223                .size   = size,
 224                .value  = val,
 225        };
 226        struct pcifront_sd *sd = bus->sysdata;
 227        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 228
 229        if (verbose_request)
 230                dev_info(&pdev->xdev->dev,
 231                         "write dev=%04x:%02x:%02x.%01x - "
 232                         "offset %x size %d val %x\n",
 233                         pci_domain_nr(bus), bus->number,
 234                         PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
 235
 236        return errno_to_pcibios_err(do_pci_op(pdev, &op));
 237}
 238
 239struct pci_ops pcifront_bus_ops = {
 240        .read = pcifront_bus_read,
 241        .write = pcifront_bus_write,
 242};
 243
 244#ifdef CONFIG_PCI_MSI
 245static int pci_frontend_enable_msix(struct pci_dev *dev,
 246                                    int **vector, int nvec)
 247{
 248        int err;
 249        int i;
 250        struct xen_pci_op op = {
 251                .cmd    = XEN_PCI_OP_enable_msix,
 252                .domain = pci_domain_nr(dev->bus),
 253                .bus = dev->bus->number,
 254                .devfn = dev->devfn,
 255                .value = nvec,
 256        };
 257        struct pcifront_sd *sd = dev->bus->sysdata;
 258        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 259        struct msi_desc *entry;
 260
 261        if (nvec > SH_INFO_MAX_VEC) {
 262                dev_err(&dev->dev, "too much vector for pci frontend: %x."
 263                                   " Increase SH_INFO_MAX_VEC.\n", nvec);
 264                return -EINVAL;
 265        }
 266
 267        i = 0;
 268        list_for_each_entry(entry, &dev->msi_list, list) {
 269                op.msix_entries[i].entry = entry->msi_attrib.entry_nr;
 270                /* Vector is useless at this point. */
 271                op.msix_entries[i].vector = -1;
 272                i++;
 273        }
 274
 275        err = do_pci_op(pdev, &op);
 276
 277        if (likely(!err)) {
 278                if (likely(!op.value)) {
 279                        /* we get the result */
 280                        for (i = 0; i < nvec; i++)
 281                                *(*vector+i) = op.msix_entries[i].vector;
 282                        return 0;
 283                } else {
 284                        printk(KERN_DEBUG "enable msix get value %x\n",
 285                                op.value);
 286                        return op.value;
 287                }
 288        } else {
 289                dev_err(&dev->dev, "enable msix get err %x\n", err);
 290                return err;
 291        }
 292}
 293
 294static void pci_frontend_disable_msix(struct pci_dev *dev)
 295{
 296        int err;
 297        struct xen_pci_op op = {
 298                .cmd    = XEN_PCI_OP_disable_msix,
 299                .domain = pci_domain_nr(dev->bus),
 300                .bus = dev->bus->number,
 301                .devfn = dev->devfn,
 302        };
 303        struct pcifront_sd *sd = dev->bus->sysdata;
 304        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 305
 306        err = do_pci_op(pdev, &op);
 307
 308        /* What should do for error ? */
 309        if (err)
 310                dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
 311}
 312
 313static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
 314{
 315        int err;
 316        struct xen_pci_op op = {
 317                .cmd    = XEN_PCI_OP_enable_msi,
 318                .domain = pci_domain_nr(dev->bus),
 319                .bus = dev->bus->number,
 320                .devfn = dev->devfn,
 321        };
 322        struct pcifront_sd *sd = dev->bus->sysdata;
 323        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 324
 325        err = do_pci_op(pdev, &op);
 326        if (likely(!err)) {
 327                *(*vector) = op.value;
 328        } else {
 329                dev_err(&dev->dev, "pci frontend enable msi failed for dev "
 330                                    "%x:%x\n", op.bus, op.devfn);
 331                err = -EINVAL;
 332        }
 333        return err;
 334}
 335
 336static void pci_frontend_disable_msi(struct pci_dev *dev)
 337{
 338        int err;
 339        struct xen_pci_op op = {
 340                .cmd    = XEN_PCI_OP_disable_msi,
 341                .domain = pci_domain_nr(dev->bus),
 342                .bus = dev->bus->number,
 343                .devfn = dev->devfn,
 344        };
 345        struct pcifront_sd *sd = dev->bus->sysdata;
 346        struct pcifront_device *pdev = pcifront_get_pdev(sd);
 347
 348        err = do_pci_op(pdev, &op);
 349        if (err == XEN_PCI_ERR_dev_not_found) {
 350                /* XXX No response from backend, what shall we do? */
 351                printk(KERN_DEBUG "get no response from backend for disable MSI\n");
 352                return;
 353        }
 354        if (err)
 355                /* how can pciback notify us fail? */
 356                printk(KERN_DEBUG "get fake response frombackend\n");
 357}
 358
 359static struct xen_pci_frontend_ops pci_frontend_ops = {
 360        .enable_msi = pci_frontend_enable_msi,
 361        .disable_msi = pci_frontend_disable_msi,
 362        .enable_msix = pci_frontend_enable_msix,
 363        .disable_msix = pci_frontend_disable_msix,
 364};
 365
 366static void pci_frontend_registrar(int enable)
 367{
 368        if (enable)
 369                xen_pci_frontend = &pci_frontend_ops;
 370        else
 371                xen_pci_frontend = NULL;
 372};
 373#else
 374static inline void pci_frontend_registrar(int enable) { };
 375#endif /* CONFIG_PCI_MSI */
 376
 377/* Claim resources for the PCI frontend as-is, backend won't allow changes */
 378static int pcifront_claim_resource(struct pci_dev *dev, void *data)
 379{
 380        struct pcifront_device *pdev = data;
 381        int i;
 382        struct resource *r;
 383
 384        for (i = 0; i < PCI_NUM_RESOURCES; i++) {
 385                r = &dev->resource[i];
 386
 387                if (!r->parent && r->start && r->flags) {
 388                        dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
 389                                pci_name(dev), i);
 390                        if (pci_claim_resource(dev, i)) {
 391                                dev_err(&pdev->xdev->dev, "Could not claim "
 392                                        "resource %s/%d! Device offline. Try "
 393                                        "giving less than 4GB to domain.\n",
 394                                        pci_name(dev), i);
 395                        }
 396                }
 397        }
 398
 399        return 0;
 400}
 401
 402static int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
 403                                unsigned int domain, unsigned int bus,
 404                                struct pci_bus *b)
 405{
 406        struct pci_dev *d;
 407        unsigned int devfn;
 408
 409        /* Scan the bus for functions and add.
 410         * We omit handling of PCI bridge attachment because pciback prevents
 411         * bridges from being exported.
 412         */
 413        for (devfn = 0; devfn < 0x100; devfn++) {
 414                d = pci_get_slot(b, devfn);
 415                if (d) {
 416                        /* Device is already known. */
 417                        pci_dev_put(d);
 418                        continue;
 419                }
 420
 421                d = pci_scan_single_device(b, devfn);
 422                if (d)
 423                        dev_info(&pdev->xdev->dev, "New device on "
 424                                 "%04x:%02x:%02x.%02x found.\n", domain, bus,
 425                                 PCI_SLOT(devfn), PCI_FUNC(devfn));
 426        }
 427
 428        return 0;
 429}
 430
 431static int __devinit pcifront_scan_root(struct pcifront_device *pdev,
 432                                 unsigned int domain, unsigned int bus)
 433{
 434        struct pci_bus *b;
 435        struct pcifront_sd *sd = NULL;
 436        struct pci_bus_entry *bus_entry = NULL;
 437        int err = 0;
 438
 439#ifndef CONFIG_PCI_DOMAINS
 440        if (domain != 0) {
 441                dev_err(&pdev->xdev->dev,
 442                        "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
 443                dev_err(&pdev->xdev->dev,
 444                        "Please compile with CONFIG_PCI_DOMAINS\n");
 445                err = -EINVAL;
 446                goto err_out;
 447        }
 448#endif
 449
 450        dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
 451                 domain, bus);
 452
 453        bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
 454        sd = kmalloc(sizeof(*sd), GFP_KERNEL);
 455        if (!bus_entry || !sd) {
 456                err = -ENOMEM;
 457                goto err_out;
 458        }
 459        pcifront_init_sd(sd, domain, bus, pdev);
 460
 461        b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
 462                                  &pcifront_bus_ops, sd);
 463        if (!b) {
 464                dev_err(&pdev->xdev->dev,
 465                        "Error creating PCI Frontend Bus!\n");
 466                err = -ENOMEM;
 467                goto err_out;
 468        }
 469
 470        bus_entry->bus = b;
 471
 472        list_add(&bus_entry->list, &pdev->root_buses);
 473
 474        /* pci_scan_bus_parented skips devices which do not have a have
 475        * devfn==0. The pcifront_scan_bus enumerates all devfn. */
 476        err = pcifront_scan_bus(pdev, domain, bus, b);
 477
 478        /* Claim resources before going "live" with our devices */
 479        pci_walk_bus(b, pcifront_claim_resource, pdev);
 480
 481        /* Create SysFS and notify udev of the devices. Aka: "going live" */
 482        pci_bus_add_devices(b);
 483
 484        return err;
 485
 486err_out:
 487        kfree(bus_entry);
 488        kfree(sd);
 489
 490        return err;
 491}
 492
 493static int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
 494                                   unsigned int domain, unsigned int bus)
 495{
 496        int err;
 497        struct pci_bus *b;
 498
 499#ifndef CONFIG_PCI_DOMAINS
 500        if (domain != 0) {
 501                dev_err(&pdev->xdev->dev,
 502                        "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
 503                dev_err(&pdev->xdev->dev,
 504                        "Please compile with CONFIG_PCI_DOMAINS\n");
 505                return -EINVAL;
 506        }
 507#endif
 508
 509        dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
 510                 domain, bus);
 511
 512        b = pci_find_bus(domain, bus);
 513        if (!b)
 514                /* If the bus is unknown, create it. */
 515                return pcifront_scan_root(pdev, domain, bus);
 516
 517        err = pcifront_scan_bus(pdev, domain, bus, b);
 518
 519        /* Claim resources before going "live" with our devices */
 520        pci_walk_bus(b, pcifront_claim_resource, pdev);
 521
 522        /* Create SysFS and notify udev of the devices. Aka: "going live" */
 523        pci_bus_add_devices(b);
 524
 525        return err;
 526}
 527
 528static void free_root_bus_devs(struct pci_bus *bus)
 529{
 530        struct pci_dev *dev;
 531
 532        while (!list_empty(&bus->devices)) {
 533                dev = container_of(bus->devices.next, struct pci_dev,
 534                                   bus_list);
 535                dev_dbg(&dev->dev, "removing device\n");
 536                pci_remove_bus_device(dev);
 537        }
 538}
 539
 540static void pcifront_free_roots(struct pcifront_device *pdev)
 541{
 542        struct pci_bus_entry *bus_entry, *t;
 543
 544        dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
 545
 546        list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
 547                list_del(&bus_entry->list);
 548
 549                free_root_bus_devs(bus_entry->bus);
 550
 551                kfree(bus_entry->bus->sysdata);
 552
 553                device_unregister(bus_entry->bus->bridge);
 554                pci_remove_bus(bus_entry->bus);
 555
 556                kfree(bus_entry);
 557        }
 558}
 559
 560static pci_ers_result_t pcifront_common_process(int cmd,
 561                                                struct pcifront_device *pdev,
 562                                                pci_channel_state_t state)
 563{
 564        pci_ers_result_t result;
 565        struct pci_driver *pdrv;
 566        int bus = pdev->sh_info->aer_op.bus;
 567        int devfn = pdev->sh_info->aer_op.devfn;
 568        struct pci_dev *pcidev;
 569        int flag = 0;
 570
 571        dev_dbg(&pdev->xdev->dev,
 572                "pcifront AER process: cmd %x (bus:%x, devfn%x)",
 573                cmd, bus, devfn);
 574        result = PCI_ERS_RESULT_NONE;
 575
 576        pcidev = pci_get_bus_and_slot(bus, devfn);
 577        if (!pcidev || !pcidev->driver) {
 578                dev_err(&pdev->xdev->dev, "device or AER driver is NULL\n");
 579                if (pcidev)
 580                        pci_dev_put(pcidev);
 581                return result;
 582        }
 583        pdrv = pcidev->driver;
 584
 585        if (get_driver(&pdrv->driver)) {
 586                if (pdrv->err_handler && pdrv->err_handler->error_detected) {
 587                        dev_dbg(&pcidev->dev,
 588                                "trying to call AER service\n");
 589                        if (pcidev) {
 590                                flag = 1;
 591                                switch (cmd) {
 592                                case XEN_PCI_OP_aer_detected:
 593                                        result = pdrv->err_handler->
 594                                                 error_detected(pcidev, state);
 595                                        break;
 596                                case XEN_PCI_OP_aer_mmio:
 597                                        result = pdrv->err_handler->
 598                                                 mmio_enabled(pcidev);
 599                                        break;
 600                                case XEN_PCI_OP_aer_slotreset:
 601                                        result = pdrv->err_handler->
 602                                                 slot_reset(pcidev);
 603                                        break;
 604                                case XEN_PCI_OP_aer_resume:
 605                                        pdrv->err_handler->resume(pcidev);
 606                                        break;
 607                                default:
 608                                        dev_err(&pdev->xdev->dev,
 609                                                "bad request in aer recovery "
 610                                                "operation!\n");
 611
 612                                }
 613                        }
 614                }
 615                put_driver(&pdrv->driver);
 616        }
 617        if (!flag)
 618                result = PCI_ERS_RESULT_NONE;
 619
 620        return result;
 621}
 622
 623
 624static void pcifront_do_aer(struct work_struct *data)
 625{
 626        struct pcifront_device *pdev =
 627                container_of(data, struct pcifront_device, op_work);
 628        int cmd = pdev->sh_info->aer_op.cmd;
 629        pci_channel_state_t state =
 630                (pci_channel_state_t)pdev->sh_info->aer_op.err;
 631
 632        /*If a pci_conf op is in progress,
 633                we have to wait until it is done before service aer op*/
 634        dev_dbg(&pdev->xdev->dev,
 635                "pcifront service aer bus %x devfn %x\n",
 636                pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
 637
 638        pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state);
 639
 640        /* Post the operation to the guest. */
 641        wmb();
 642        clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags);
 643        notify_remote_via_evtchn(pdev->evtchn);
 644
 645        /*in case of we lost an aer request in four lines time_window*/
 646        smp_mb__before_clear_bit();
 647        clear_bit(_PDEVB_op_active, &pdev->flags);
 648        smp_mb__after_clear_bit();
 649
 650        schedule_pcifront_aer_op(pdev);
 651
 652}
 653
 654static irqreturn_t pcifront_handler_aer(int irq, void *dev)
 655{
 656        struct pcifront_device *pdev = dev;
 657        schedule_pcifront_aer_op(pdev);
 658        return IRQ_HANDLED;
 659}
 660static int pcifront_connect(struct pcifront_device *pdev)
 661{
 662        int err = 0;
 663
 664        spin_lock(&pcifront_dev_lock);
 665
 666        if (!pcifront_dev) {
 667                dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
 668                pcifront_dev = pdev;
 669        } else {
 670                dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
 671                err = -EEXIST;
 672        }
 673
 674        spin_unlock(&pcifront_dev_lock);
 675
 676        return err;
 677}
 678
 679static void pcifront_disconnect(struct pcifront_device *pdev)
 680{
 681        spin_lock(&pcifront_dev_lock);
 682
 683        if (pdev == pcifront_dev) {
 684                dev_info(&pdev->xdev->dev,
 685                         "Disconnecting PCI Frontend Buses\n");
 686                pcifront_dev = NULL;
 687        }
 688
 689        spin_unlock(&pcifront_dev_lock);
 690}
 691static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
 692{
 693        struct pcifront_device *pdev;
 694
 695        pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
 696        if (pdev == NULL)
 697                goto out;
 698
 699        pdev->sh_info =
 700            (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
 701        if (pdev->sh_info == NULL) {
 702                kfree(pdev);
 703                pdev = NULL;
 704                goto out;
 705        }
 706        pdev->sh_info->flags = 0;
 707
 708        /*Flag for registering PV AER handler*/
 709        set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags);
 710
 711        dev_set_drvdata(&xdev->dev, pdev);
 712        pdev->xdev = xdev;
 713
 714        INIT_LIST_HEAD(&pdev->root_buses);
 715
 716        spin_lock_init(&pdev->sh_info_lock);
 717
 718        pdev->evtchn = INVALID_EVTCHN;
 719        pdev->gnt_ref = INVALID_GRANT_REF;
 720        pdev->irq = -1;
 721
 722        INIT_WORK(&pdev->op_work, pcifront_do_aer);
 723
 724        dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
 725                pdev, pdev->sh_info);
 726out:
 727        return pdev;
 728}
 729
 730static void free_pdev(struct pcifront_device *pdev)
 731{
 732        dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
 733
 734        pcifront_free_roots(pdev);
 735
 736        /*For PCIE_AER error handling job*/
 737        flush_scheduled_work();
 738
 739        if (pdev->irq >= 0)
 740                unbind_from_irqhandler(pdev->irq, pdev);
 741
 742        if (pdev->evtchn != INVALID_EVTCHN)
 743                xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
 744
 745        if (pdev->gnt_ref != INVALID_GRANT_REF)
 746                gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */,
 747                                          (unsigned long)pdev->sh_info);
 748        else
 749                free_page((unsigned long)pdev->sh_info);
 750
 751        dev_set_drvdata(&pdev->xdev->dev, NULL);
 752
 753        kfree(pdev);
 754}
 755
 756static int pcifront_publish_info(struct pcifront_device *pdev)
 757{
 758        int err = 0;
 759        struct xenbus_transaction trans;
 760
 761        err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
 762        if (err < 0)
 763                goto out;
 764
 765        pdev->gnt_ref = err;
 766
 767        err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
 768        if (err)
 769                goto out;
 770
 771        err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer,
 772                0, "pcifront", pdev);
 773
 774        if (err < 0)
 775                return err;
 776
 777        pdev->irq = err;
 778
 779do_publish:
 780        err = xenbus_transaction_start(&trans);
 781        if (err) {
 782                xenbus_dev_fatal(pdev->xdev, err,
 783                                 "Error writing configuration for backend "
 784                                 "(start transaction)");
 785                goto out;
 786        }
 787
 788        err = xenbus_printf(trans, pdev->xdev->nodename,
 789                            "pci-op-ref", "%u", pdev->gnt_ref);
 790        if (!err)
 791                err = xenbus_printf(trans, pdev->xdev->nodename,
 792                                    "event-channel", "%u", pdev->evtchn);
 793        if (!err)
 794                err = xenbus_printf(trans, pdev->xdev->nodename,
 795                                    "magic", XEN_PCI_MAGIC);
 796
 797        if (err) {
 798                xenbus_transaction_end(trans, 1);
 799                xenbus_dev_fatal(pdev->xdev, err,
 800                                 "Error writing configuration for backend");
 801                goto out;
 802        } else {
 803                err = xenbus_transaction_end(trans, 0);
 804                if (err == -EAGAIN)
 805                        goto do_publish;
 806                else if (err) {
 807                        xenbus_dev_fatal(pdev->xdev, err,
 808                                         "Error completing transaction "
 809                                         "for backend");
 810                        goto out;
 811                }
 812        }
 813
 814        xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
 815
 816        dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
 817
 818out:
 819        return err;
 820}
 821
 822static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
 823{
 824        int err = -EFAULT;
 825        int i, num_roots, len;
 826        char str[64];
 827        unsigned int domain, bus;
 828
 829
 830        /* Only connect once */
 831        if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 832            XenbusStateInitialised)
 833                goto out;
 834
 835        err = pcifront_connect(pdev);
 836        if (err) {
 837                xenbus_dev_fatal(pdev->xdev, err,
 838                                 "Error connecting PCI Frontend");
 839                goto out;
 840        }
 841
 842        err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
 843                           "root_num", "%d", &num_roots);
 844        if (err == -ENOENT) {
 845                xenbus_dev_error(pdev->xdev, err,
 846                                 "No PCI Roots found, trying 0000:00");
 847                err = pcifront_scan_root(pdev, 0, 0);
 848                num_roots = 0;
 849        } else if (err != 1) {
 850                if (err == 0)
 851                        err = -EINVAL;
 852                xenbus_dev_fatal(pdev->xdev, err,
 853                                 "Error reading number of PCI roots");
 854                goto out;
 855        }
 856
 857        for (i = 0; i < num_roots; i++) {
 858                len = snprintf(str, sizeof(str), "root-%d", i);
 859                if (unlikely(len >= (sizeof(str) - 1))) {
 860                        err = -ENOMEM;
 861                        goto out;
 862                }
 863
 864                err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
 865                                   "%x:%x", &domain, &bus);
 866                if (err != 2) {
 867                        if (err >= 0)
 868                                err = -EINVAL;
 869                        xenbus_dev_fatal(pdev->xdev, err,
 870                                         "Error reading PCI root %d", i);
 871                        goto out;
 872                }
 873
 874                err = pcifront_scan_root(pdev, domain, bus);
 875                if (err) {
 876                        xenbus_dev_fatal(pdev->xdev, err,
 877                                         "Error scanning PCI root %04x:%02x",
 878                                         domain, bus);
 879                        goto out;
 880                }
 881        }
 882
 883        err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
 884
 885out:
 886        return err;
 887}
 888
 889static int pcifront_try_disconnect(struct pcifront_device *pdev)
 890{
 891        int err = 0;
 892        enum xenbus_state prev_state;
 893
 894
 895        prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
 896
 897        if (prev_state >= XenbusStateClosing)
 898                goto out;
 899
 900        if (prev_state == XenbusStateConnected) {
 901                pcifront_free_roots(pdev);
 902                pcifront_disconnect(pdev);
 903        }
 904
 905        err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
 906
 907out:
 908
 909        return err;
 910}
 911
 912static int __devinit pcifront_attach_devices(struct pcifront_device *pdev)
 913{
 914        int err = -EFAULT;
 915        int i, num_roots, len;
 916        unsigned int domain, bus;
 917        char str[64];
 918
 919        if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 920            XenbusStateReconfiguring)
 921                goto out;
 922
 923        err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
 924                           "root_num", "%d", &num_roots);
 925        if (err == -ENOENT) {
 926                xenbus_dev_error(pdev->xdev, err,
 927                                 "No PCI Roots found, trying 0000:00");
 928                err = pcifront_rescan_root(pdev, 0, 0);
 929                num_roots = 0;
 930        } else if (err != 1) {
 931                if (err == 0)
 932                        err = -EINVAL;
 933                xenbus_dev_fatal(pdev->xdev, err,
 934                                 "Error reading number of PCI roots");
 935                goto out;
 936        }
 937
 938        for (i = 0; i < num_roots; i++) {
 939                len = snprintf(str, sizeof(str), "root-%d", i);
 940                if (unlikely(len >= (sizeof(str) - 1))) {
 941                        err = -ENOMEM;
 942                        goto out;
 943                }
 944
 945                err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
 946                                   "%x:%x", &domain, &bus);
 947                if (err != 2) {
 948                        if (err >= 0)
 949                                err = -EINVAL;
 950                        xenbus_dev_fatal(pdev->xdev, err,
 951                                         "Error reading PCI root %d", i);
 952                        goto out;
 953                }
 954
 955                err = pcifront_rescan_root(pdev, domain, bus);
 956                if (err) {
 957                        xenbus_dev_fatal(pdev->xdev, err,
 958                                         "Error scanning PCI root %04x:%02x",
 959                                         domain, bus);
 960                        goto out;
 961                }
 962        }
 963
 964        xenbus_switch_state(pdev->xdev, XenbusStateConnected);
 965
 966out:
 967        return err;
 968}
 969
 970static int pcifront_detach_devices(struct pcifront_device *pdev)
 971{
 972        int err = 0;
 973        int i, num_devs;
 974        unsigned int domain, bus, slot, func;
 975        struct pci_bus *pci_bus;
 976        struct pci_dev *pci_dev;
 977        char str[64];
 978
 979        if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 980            XenbusStateConnected)
 981                goto out;
 982
 983        err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
 984                           &num_devs);
 985        if (err != 1) {
 986                if (err >= 0)
 987                        err = -EINVAL;
 988                xenbus_dev_fatal(pdev->xdev, err,
 989                                 "Error reading number of PCI devices");
 990                goto out;
 991        }
 992
 993        /* Find devices being detached and remove them. */
 994        for (i = 0; i < num_devs; i++) {
 995                int l, state;
 996                l = snprintf(str, sizeof(str), "state-%d", i);
 997                if (unlikely(l >= (sizeof(str) - 1))) {
 998                        err = -ENOMEM;
 999                        goto out;
1000                }
1001                err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d",
1002                                   &state);
1003                if (err != 1)
1004                        state = XenbusStateUnknown;
1005
1006                if (state != XenbusStateClosing)
1007                        continue;
1008
1009                /* Remove device. */
1010                l = snprintf(str, sizeof(str), "vdev-%d", i);
1011                if (unlikely(l >= (sizeof(str) - 1))) {
1012                        err = -ENOMEM;
1013                        goto out;
1014                }
1015                err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
1016                                   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
1017                if (err != 4) {
1018                        if (err >= 0)
1019                                err = -EINVAL;
1020                        xenbus_dev_fatal(pdev->xdev, err,
1021                                         "Error reading PCI device %d", i);
1022                        goto out;
1023                }
1024
1025                pci_bus = pci_find_bus(domain, bus);
1026                if (!pci_bus) {
1027                        dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n",
1028                                domain, bus);
1029                        continue;
1030                }
1031                pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
1032                if (!pci_dev) {
1033                        dev_dbg(&pdev->xdev->dev,
1034                                "Cannot get PCI device %04x:%02x:%02x.%02x\n",
1035                                domain, bus, slot, func);
1036                        continue;
1037                }
1038                pci_remove_bus_device(pci_dev);
1039                pci_dev_put(pci_dev);
1040
1041                dev_dbg(&pdev->xdev->dev,
1042                        "PCI device %04x:%02x:%02x.%02x removed.\n",
1043                        domain, bus, slot, func);
1044        }
1045
1046        err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
1047
1048out:
1049        return err;
1050}
1051
1052static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
1053                                                  enum xenbus_state be_state)
1054{
1055        struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
1056
1057        switch (be_state) {
1058        case XenbusStateUnknown:
1059        case XenbusStateInitialising:
1060        case XenbusStateInitWait:
1061        case XenbusStateInitialised:
1062        case XenbusStateClosed:
1063                break;
1064
1065        case XenbusStateConnected:
1066                pcifront_try_connect(pdev);
1067                break;
1068
1069        case XenbusStateClosing:
1070                dev_warn(&xdev->dev, "backend going away!\n");
1071                pcifront_try_disconnect(pdev);
1072                break;
1073
1074        case XenbusStateReconfiguring:
1075                pcifront_detach_devices(pdev);
1076                break;
1077
1078        case XenbusStateReconfigured:
1079                pcifront_attach_devices(pdev);
1080                break;
1081        }
1082}
1083
1084static int pcifront_xenbus_probe(struct xenbus_device *xdev,
1085                                 const struct xenbus_device_id *id)
1086{
1087        int err = 0;
1088        struct pcifront_device *pdev = alloc_pdev(xdev);
1089
1090        if (pdev == NULL) {
1091                err = -ENOMEM;
1092                xenbus_dev_fatal(xdev, err,
1093                                 "Error allocating pcifront_device struct");
1094                goto out;
1095        }
1096
1097        err = pcifront_publish_info(pdev);
1098        if (err)
1099                free_pdev(pdev);
1100
1101out:
1102        return err;
1103}
1104
1105static int pcifront_xenbus_remove(struct xenbus_device *xdev)
1106{
1107        struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
1108        if (pdev)
1109                free_pdev(pdev);
1110
1111        return 0;
1112}
1113
1114static const struct xenbus_device_id xenpci_ids[] = {
1115        {"pci"},
1116        {""},
1117};
1118
1119static struct xenbus_driver xenbus_pcifront_driver = {
1120        .name                   = "pcifront",
1121        .owner                  = THIS_MODULE,
1122        .ids                    = xenpci_ids,
1123        .probe                  = pcifront_xenbus_probe,
1124        .remove                 = pcifront_xenbus_remove,
1125        .otherend_changed       = pcifront_backend_changed,
1126};
1127
1128static int __init pcifront_init(void)
1129{
1130        if (!xen_pv_domain() || xen_initial_domain())
1131                return -ENODEV;
1132
1133        pci_frontend_registrar(1 /* enable */);
1134
1135        return xenbus_register_frontend(&xenbus_pcifront_driver);
1136}
1137
1138static void __exit pcifront_cleanup(void)
1139{
1140        xenbus_unregister_driver(&xenbus_pcifront_driver);
1141        pci_frontend_registrar(0 /* disable */);
1142}
1143module_init(pcifront_init);
1144module_exit(pcifront_cleanup);
1145
1146MODULE_DESCRIPTION("Xen PCI passthrough frontend.");
1147MODULE_LICENSE("GPL");
1148MODULE_ALIAS("xen:pci");
1149