linux/drivers/pci/iov.c
<<
>>
Prefs
   1/*
   2 * drivers/pci/iov.c
   3 *
   4 * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
   5 *
   6 * PCI Express I/O Virtualization (IOV) support.
   7 *   Single Root IOV 1.0
   8 *   Address Translation Service 1.0
   9 */
  10
  11#include <linux/pci.h>
  12#include <linux/mutex.h>
  13#include <linux/string.h>
  14#include <linux/delay.h>
  15#include "pci.h"
  16
  17#define VIRTFN_ID_LEN   16
  18
  19static inline u8 virtfn_bus(struct pci_dev *dev, int id)
  20{
  21        return dev->bus->number + ((dev->devfn + dev->sriov->offset +
  22                                    dev->sriov->stride * id) >> 8);
  23}
  24
  25static inline u8 virtfn_devfn(struct pci_dev *dev, int id)
  26{
  27        return (dev->devfn + dev->sriov->offset +
  28                dev->sriov->stride * id) & 0xff;
  29}
  30
  31static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
  32{
  33        int rc;
  34        struct pci_bus *child;
  35
  36        if (bus->number == busnr)
  37                return bus;
  38
  39        child = pci_find_bus(pci_domain_nr(bus), busnr);
  40        if (child)
  41                return child;
  42
  43        child = pci_add_new_bus(bus, NULL, busnr);
  44        if (!child)
  45                return NULL;
  46
  47        child->subordinate = busnr;
  48        child->dev.parent = bus->bridge;
  49        rc = pci_bus_add_child(child);
  50        if (rc) {
  51                pci_remove_bus(child);
  52                return NULL;
  53        }
  54
  55        return child;
  56}
  57
  58static void virtfn_remove_bus(struct pci_bus *bus, int busnr)
  59{
  60        struct pci_bus *child;
  61
  62        if (bus->number == busnr)
  63                return;
  64
  65        child = pci_find_bus(pci_domain_nr(bus), busnr);
  66        BUG_ON(!child);
  67
  68        if (list_empty(&child->devices))
  69                pci_remove_bus(child);
  70}
  71
  72static int virtfn_add(struct pci_dev *dev, int id, int reset)
  73{
  74        int i;
  75        int rc;
  76        u64 size;
  77        char buf[VIRTFN_ID_LEN];
  78        struct pci_dev *virtfn;
  79        struct resource *res;
  80        struct pci_sriov *iov = dev->sriov;
  81
  82        virtfn = alloc_pci_dev();
  83        if (!virtfn)
  84                return -ENOMEM;
  85
  86        mutex_lock(&iov->dev->sriov->lock);
  87        virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id));
  88        if (!virtfn->bus) {
  89                kfree(virtfn);
  90                mutex_unlock(&iov->dev->sriov->lock);
  91                return -ENOMEM;
  92        }
  93        virtfn->devfn = virtfn_devfn(dev, id);
  94        virtfn->vendor = dev->vendor;
  95        pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device);
  96        pci_setup_device(virtfn);
  97        virtfn->dev.parent = dev->dev.parent;
  98
  99        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 100                res = dev->resource + PCI_IOV_RESOURCES + i;
 101                if (!res->parent)
 102                        continue;
 103                virtfn->resource[i].name = pci_name(virtfn);
 104                virtfn->resource[i].flags = res->flags;
 105                size = resource_size(res);
 106                do_div(size, iov->total);
 107                virtfn->resource[i].start = res->start + size * id;
 108                virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
 109                rc = request_resource(res, &virtfn->resource[i]);
 110                BUG_ON(rc);
 111        }
 112
 113        if (reset)
 114                __pci_reset_function(virtfn);
 115
 116        pci_device_add(virtfn, virtfn->bus);
 117        mutex_unlock(&iov->dev->sriov->lock);
 118
 119        virtfn->physfn = pci_dev_get(dev);
 120        virtfn->is_virtfn = 1;
 121
 122        rc = pci_bus_add_device(virtfn);
 123        if (rc)
 124                goto failed1;
 125        sprintf(buf, "virtfn%u", id);
 126        rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
 127        if (rc)
 128                goto failed1;
 129        rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
 130        if (rc)
 131                goto failed2;
 132
 133        kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
 134
 135        return 0;
 136
 137failed2:
 138        sysfs_remove_link(&dev->dev.kobj, buf);
 139failed1:
 140        pci_dev_put(dev);
 141        mutex_lock(&iov->dev->sriov->lock);
 142        pci_remove_bus_device(virtfn);
 143        virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
 144        mutex_unlock(&iov->dev->sriov->lock);
 145
 146        return rc;
 147}
 148
 149static void virtfn_remove(struct pci_dev *dev, int id, int reset)
 150{
 151        char buf[VIRTFN_ID_LEN];
 152        struct pci_bus *bus;
 153        struct pci_dev *virtfn;
 154        struct pci_sriov *iov = dev->sriov;
 155
 156        bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id));
 157        if (!bus)
 158                return;
 159
 160        virtfn = pci_get_slot(bus, virtfn_devfn(dev, id));
 161        if (!virtfn)
 162                return;
 163
 164        pci_dev_put(virtfn);
 165
 166        if (reset) {
 167                device_release_driver(&virtfn->dev);
 168                __pci_reset_function(virtfn);
 169        }
 170
 171        sprintf(buf, "virtfn%u", id);
 172        sysfs_remove_link(&dev->dev.kobj, buf);
 173        sysfs_remove_link(&virtfn->dev.kobj, "physfn");
 174
 175        mutex_lock(&iov->dev->sriov->lock);
 176        pci_remove_bus_device(virtfn);
 177        virtfn_remove_bus(dev->bus, virtfn_bus(dev, id));
 178        mutex_unlock(&iov->dev->sriov->lock);
 179
 180        pci_dev_put(dev);
 181}
 182
 183static int sriov_migration(struct pci_dev *dev)
 184{
 185        u16 status;
 186        struct pci_sriov *iov = dev->sriov;
 187
 188        if (!iov->nr_virtfn)
 189                return 0;
 190
 191        if (!(iov->cap & PCI_SRIOV_CAP_VFM))
 192                return 0;
 193
 194        pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status);
 195        if (!(status & PCI_SRIOV_STATUS_VFM))
 196                return 0;
 197
 198        schedule_work(&iov->mtask);
 199
 200        return 1;
 201}
 202
 203static void sriov_migration_task(struct work_struct *work)
 204{
 205        int i;
 206        u8 state;
 207        u16 status;
 208        struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask);
 209
 210        for (i = iov->initial; i < iov->nr_virtfn; i++) {
 211                state = readb(iov->mstate + i);
 212                if (state == PCI_SRIOV_VFM_MI) {
 213                        writeb(PCI_SRIOV_VFM_AV, iov->mstate + i);
 214                        state = readb(iov->mstate + i);
 215                        if (state == PCI_SRIOV_VFM_AV)
 216                                virtfn_add(iov->self, i, 1);
 217                } else if (state == PCI_SRIOV_VFM_MO) {
 218                        virtfn_remove(iov->self, i, 1);
 219                        writeb(PCI_SRIOV_VFM_UA, iov->mstate + i);
 220                        state = readb(iov->mstate + i);
 221                        if (state == PCI_SRIOV_VFM_AV)
 222                                virtfn_add(iov->self, i, 0);
 223                }
 224        }
 225
 226        pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status);
 227        status &= ~PCI_SRIOV_STATUS_VFM;
 228        pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status);
 229}
 230
 231static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn)
 232{
 233        int bir;
 234        u32 table;
 235        resource_size_t pa;
 236        struct pci_sriov *iov = dev->sriov;
 237
 238        if (nr_virtfn <= iov->initial)
 239                return 0;
 240
 241        pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table);
 242        bir = PCI_SRIOV_VFM_BIR(table);
 243        if (bir > PCI_STD_RESOURCE_END)
 244                return -EIO;
 245
 246        table = PCI_SRIOV_VFM_OFFSET(table);
 247        if (table + nr_virtfn > pci_resource_len(dev, bir))
 248                return -EIO;
 249
 250        pa = pci_resource_start(dev, bir) + table;
 251        iov->mstate = ioremap(pa, nr_virtfn);
 252        if (!iov->mstate)
 253                return -ENOMEM;
 254
 255        INIT_WORK(&iov->mtask, sriov_migration_task);
 256
 257        iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR;
 258        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 259
 260        return 0;
 261}
 262
 263static void sriov_disable_migration(struct pci_dev *dev)
 264{
 265        struct pci_sriov *iov = dev->sriov;
 266
 267        iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR);
 268        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 269
 270        cancel_work_sync(&iov->mtask);
 271        iounmap(iov->mstate);
 272}
 273
 274static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 275{
 276        int rc;
 277        int i, j;
 278        int nres;
 279        u16 offset, stride, initial;
 280        struct resource *res;
 281        struct pci_dev *pdev;
 282        struct pci_sriov *iov = dev->sriov;
 283
 284        if (!nr_virtfn)
 285                return 0;
 286
 287        if (iov->nr_virtfn)
 288                return -EINVAL;
 289
 290        pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
 291        if (initial > iov->total ||
 292            (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total)))
 293                return -EIO;
 294
 295        if (nr_virtfn < 0 || nr_virtfn > iov->total ||
 296            (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
 297                return -EINVAL;
 298
 299        pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
 300        pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset);
 301        pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride);
 302        if (!offset || (nr_virtfn > 1 && !stride))
 303                return -EIO;
 304
 305        nres = 0;
 306        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 307                res = dev->resource + PCI_IOV_RESOURCES + i;
 308                if (res->parent)
 309                        nres++;
 310        }
 311        if (nres != iov->nres) {
 312                dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n");
 313                return -ENOMEM;
 314        }
 315
 316        iov->offset = offset;
 317        iov->stride = stride;
 318
 319        if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) {
 320                dev_err(&dev->dev, "SR-IOV: bus number out of range\n");
 321                return -ENOMEM;
 322        }
 323
 324        if (iov->link != dev->devfn) {
 325                pdev = pci_get_slot(dev->bus, iov->link);
 326                if (!pdev)
 327                        return -ENODEV;
 328
 329                pci_dev_put(pdev);
 330
 331                if (!pdev->is_physfn)
 332                        return -ENODEV;
 333
 334                rc = sysfs_create_link(&dev->dev.kobj,
 335                                        &pdev->dev.kobj, "dep_link");
 336                if (rc)
 337                        return rc;
 338        }
 339
 340        iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
 341        pci_block_user_cfg_access(dev);
 342        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 343        msleep(100);
 344        pci_unblock_user_cfg_access(dev);
 345
 346        iov->initial = initial;
 347        if (nr_virtfn < initial)
 348                initial = nr_virtfn;
 349
 350        for (i = 0; i < initial; i++) {
 351                rc = virtfn_add(dev, i, 0);
 352                if (rc)
 353                        goto failed;
 354        }
 355
 356        if (iov->cap & PCI_SRIOV_CAP_VFM) {
 357                rc = sriov_enable_migration(dev, nr_virtfn);
 358                if (rc)
 359                        goto failed;
 360        }
 361
 362        kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
 363        iov->nr_virtfn = nr_virtfn;
 364
 365        return 0;
 366
 367failed:
 368        for (j = 0; j < i; j++)
 369                virtfn_remove(dev, j, 0);
 370
 371        iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
 372        pci_block_user_cfg_access(dev);
 373        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 374        ssleep(1);
 375        pci_unblock_user_cfg_access(dev);
 376
 377        if (iov->link != dev->devfn)
 378                sysfs_remove_link(&dev->dev.kobj, "dep_link");
 379
 380        return rc;
 381}
 382
 383static void sriov_disable(struct pci_dev *dev)
 384{
 385        int i;
 386        struct pci_sriov *iov = dev->sriov;
 387
 388        if (!iov->nr_virtfn)
 389                return;
 390
 391        if (iov->cap & PCI_SRIOV_CAP_VFM)
 392                sriov_disable_migration(dev);
 393
 394        for (i = 0; i < iov->nr_virtfn; i++)
 395                virtfn_remove(dev, i, 0);
 396
 397        iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
 398        pci_block_user_cfg_access(dev);
 399        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 400        ssleep(1);
 401        pci_unblock_user_cfg_access(dev);
 402
 403        if (iov->link != dev->devfn)
 404                sysfs_remove_link(&dev->dev.kobj, "dep_link");
 405
 406        iov->nr_virtfn = 0;
 407}
 408
 409static int sriov_init(struct pci_dev *dev, int pos)
 410{
 411        int i;
 412        int rc;
 413        int nres;
 414        u32 pgsz;
 415        u16 ctrl, total, offset, stride;
 416        struct pci_sriov *iov;
 417        struct resource *res;
 418        struct pci_dev *pdev;
 419
 420        if (dev->pcie_type != PCI_EXP_TYPE_RC_END &&
 421            dev->pcie_type != PCI_EXP_TYPE_ENDPOINT)
 422                return -ENODEV;
 423
 424        pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
 425        if (ctrl & PCI_SRIOV_CTRL_VFE) {
 426                pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
 427                ssleep(1);
 428        }
 429
 430        pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
 431        if (!total)
 432                return 0;
 433
 434        ctrl = 0;
 435        list_for_each_entry(pdev, &dev->bus->devices, bus_list)
 436                if (pdev->is_physfn)
 437                        goto found;
 438
 439        pdev = NULL;
 440        if (pci_ari_enabled(dev->bus))
 441                ctrl |= PCI_SRIOV_CTRL_ARI;
 442
 443found:
 444        pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
 445        pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total);
 446        pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset);
 447        pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride);
 448        if (!offset || (total > 1 && !stride))
 449                return -EIO;
 450
 451        pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
 452        i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
 453        pgsz &= ~((1 << i) - 1);
 454        if (!pgsz)
 455                return -EIO;
 456
 457        pgsz &= ~(pgsz - 1);
 458        pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
 459
 460        nres = 0;
 461        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 462                res = dev->resource + PCI_IOV_RESOURCES + i;
 463                i += __pci_read_base(dev, pci_bar_unknown, res,
 464                                     pos + PCI_SRIOV_BAR + i * 4);
 465                if (!res->flags)
 466                        continue;
 467                if (resource_size(res) & (PAGE_SIZE - 1)) {
 468                        rc = -EIO;
 469                        goto failed;
 470                }
 471                res->end = res->start + resource_size(res) * total - 1;
 472                nres++;
 473        }
 474
 475        iov = kzalloc(sizeof(*iov), GFP_KERNEL);
 476        if (!iov) {
 477                rc = -ENOMEM;
 478                goto failed;
 479        }
 480
 481        iov->pos = pos;
 482        iov->nres = nres;
 483        iov->ctrl = ctrl;
 484        iov->total = total;
 485        iov->offset = offset;
 486        iov->stride = stride;
 487        iov->pgsz = pgsz;
 488        iov->self = dev;
 489        pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
 490        pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
 491        if (dev->pcie_type == PCI_EXP_TYPE_RC_END)
 492                iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
 493
 494        if (pdev)
 495                iov->dev = pci_dev_get(pdev);
 496        else
 497                iov->dev = dev;
 498
 499        mutex_init(&iov->lock);
 500
 501        dev->sriov = iov;
 502        dev->is_physfn = 1;
 503
 504        return 0;
 505
 506failed:
 507        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 508                res = dev->resource + PCI_IOV_RESOURCES + i;
 509                res->flags = 0;
 510        }
 511
 512        return rc;
 513}
 514
 515static void sriov_release(struct pci_dev *dev)
 516{
 517        BUG_ON(dev->sriov->nr_virtfn);
 518
 519        if (dev != dev->sriov->dev)
 520                pci_dev_put(dev->sriov->dev);
 521
 522        mutex_destroy(&dev->sriov->lock);
 523
 524        kfree(dev->sriov);
 525        dev->sriov = NULL;
 526}
 527
 528static void sriov_restore_state(struct pci_dev *dev)
 529{
 530        int i;
 531        u16 ctrl;
 532        struct pci_sriov *iov = dev->sriov;
 533
 534        pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
 535        if (ctrl & PCI_SRIOV_CTRL_VFE)
 536                return;
 537
 538        for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
 539                pci_update_resource(dev, i);
 540
 541        pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
 542        pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn);
 543        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 544        if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
 545                msleep(100);
 546}
 547
 548/**
 549 * pci_iov_init - initialize the IOV capability
 550 * @dev: the PCI device
 551 *
 552 * Returns 0 on success, or negative on failure.
 553 */
 554int pci_iov_init(struct pci_dev *dev)
 555{
 556        int pos;
 557
 558        if (!dev->is_pcie)
 559                return -ENODEV;
 560
 561        pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
 562        if (pos)
 563                return sriov_init(dev, pos);
 564
 565        return -ENODEV;
 566}
 567
 568/**
 569 * pci_iov_release - release resources used by the IOV capability
 570 * @dev: the PCI device
 571 */
 572void pci_iov_release(struct pci_dev *dev)
 573{
 574        if (dev->is_physfn)
 575                sriov_release(dev);
 576}
 577
 578/**
 579 * pci_iov_resource_bar - get position of the SR-IOV BAR
 580 * @dev: the PCI device
 581 * @resno: the resource number
 582 * @type: the BAR type to be filled in
 583 *
 584 * Returns position of the BAR encapsulated in the SR-IOV capability.
 585 */
 586int pci_iov_resource_bar(struct pci_dev *dev, int resno,
 587                         enum pci_bar_type *type)
 588{
 589        if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END)
 590                return 0;
 591
 592        BUG_ON(!dev->is_physfn);
 593
 594        *type = pci_bar_unknown;
 595
 596        return dev->sriov->pos + PCI_SRIOV_BAR +
 597                4 * (resno - PCI_IOV_RESOURCES);
 598}
 599
 600/**
 601 * pci_sriov_resource_alignment - get resource alignment for VF BAR
 602 * @dev: the PCI device
 603 * @resno: the resource number
 604 *
 605 * Returns the alignment of the VF BAR found in the SR-IOV capability.
 606 * This is not the same as the resource size which is defined as
 607 * the VF BAR size multiplied by the number of VFs.  The alignment
 608 * is just the VF BAR size.
 609 */
 610int pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
 611{
 612        struct resource tmp;
 613        enum pci_bar_type type;
 614        int reg = pci_iov_resource_bar(dev, resno, &type);
 615        
 616        if (!reg)
 617                return 0;
 618
 619         __pci_read_base(dev, type, &tmp, reg);
 620        return resource_alignment(&tmp);
 621}
 622
 623/**
 624 * pci_restore_iov_state - restore the state of the IOV capability
 625 * @dev: the PCI device
 626 */
 627void pci_restore_iov_state(struct pci_dev *dev)
 628{
 629        if (dev->is_physfn)
 630                sriov_restore_state(dev);
 631}
 632
 633/**
 634 * pci_iov_bus_range - find bus range used by Virtual Function
 635 * @bus: the PCI bus
 636 *
 637 * Returns max number of buses (exclude current one) used by Virtual
 638 * Functions.
 639 */
 640int pci_iov_bus_range(struct pci_bus *bus)
 641{
 642        int max = 0;
 643        u8 busnr;
 644        struct pci_dev *dev;
 645
 646        list_for_each_entry(dev, &bus->devices, bus_list) {
 647                if (!dev->is_physfn)
 648                        continue;
 649                busnr = virtfn_bus(dev, dev->sriov->total - 1);
 650                if (busnr > max)
 651                        max = busnr;
 652        }
 653
 654        return max ? max - bus->number : 0;
 655}
 656
 657/**
 658 * pci_enable_sriov - enable the SR-IOV capability
 659 * @dev: the PCI device
 660 * @nr_virtfn: number of virtual functions to enable
 661 *
 662 * Returns 0 on success, or negative on failure.
 663 */
 664int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
 665{
 666        might_sleep();
 667
 668        if (!dev->is_physfn)
 669                return -ENODEV;
 670
 671        return sriov_enable(dev, nr_virtfn);
 672}
 673EXPORT_SYMBOL_GPL(pci_enable_sriov);
 674
 675/**
 676 * pci_disable_sriov - disable the SR-IOV capability
 677 * @dev: the PCI device
 678 */
 679void pci_disable_sriov(struct pci_dev *dev)
 680{
 681        might_sleep();
 682
 683        if (!dev->is_physfn)
 684                return;
 685
 686        sriov_disable(dev);
 687}
 688EXPORT_SYMBOL_GPL(pci_disable_sriov);
 689
 690/**
 691 * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration
 692 * @dev: the PCI device
 693 *
 694 * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not.
 695 *
 696 * Physical Function driver is responsible to register IRQ handler using
 697 * VF Migration Interrupt Message Number, and call this function when the
 698 * interrupt is generated by the hardware.
 699 */
 700irqreturn_t pci_sriov_migration(struct pci_dev *dev)
 701{
 702        if (!dev->is_physfn)
 703                return IRQ_NONE;
 704
 705        return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE;
 706}
 707EXPORT_SYMBOL_GPL(pci_sriov_migration);
 708
 709static int ats_alloc_one(struct pci_dev *dev, int ps)
 710{
 711        int pos;
 712        u16 cap;
 713        struct pci_ats *ats;
 714
 715        pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
 716        if (!pos)
 717                return -ENODEV;
 718
 719        ats = kzalloc(sizeof(*ats), GFP_KERNEL);
 720        if (!ats)
 721                return -ENOMEM;
 722
 723        ats->pos = pos;
 724        ats->stu = ps;
 725        pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
 726        ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
 727                                            PCI_ATS_MAX_QDEP;
 728        dev->ats = ats;
 729
 730        return 0;
 731}
 732
 733static void ats_free_one(struct pci_dev *dev)
 734{
 735        kfree(dev->ats);
 736        dev->ats = NULL;
 737}
 738
 739/**
 740 * pci_enable_ats - enable the ATS capability
 741 * @dev: the PCI device
 742 * @ps: the IOMMU page shift
 743 *
 744 * Returns 0 on success, or negative on failure.
 745 */
 746int pci_enable_ats(struct pci_dev *dev, int ps)
 747{
 748        int rc;
 749        u16 ctrl;
 750
 751        BUG_ON(dev->ats && dev->ats->is_enabled);
 752
 753        if (ps < PCI_ATS_MIN_STU)
 754                return -EINVAL;
 755
 756        if (dev->is_physfn || dev->is_virtfn) {
 757                struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn;
 758
 759                mutex_lock(&pdev->sriov->lock);
 760                if (pdev->ats)
 761                        rc = pdev->ats->stu == ps ? 0 : -EINVAL;
 762                else
 763                        rc = ats_alloc_one(pdev, ps);
 764
 765                if (!rc)
 766                        pdev->ats->ref_cnt++;
 767                mutex_unlock(&pdev->sriov->lock);
 768                if (rc)
 769                        return rc;
 770        }
 771
 772        if (!dev->is_physfn) {
 773                rc = ats_alloc_one(dev, ps);
 774                if (rc)
 775                        return rc;
 776        }
 777
 778        ctrl = PCI_ATS_CTRL_ENABLE;
 779        if (!dev->is_virtfn)
 780                ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU);
 781        pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
 782
 783        dev->ats->is_enabled = 1;
 784
 785        return 0;
 786}
 787
 788/**
 789 * pci_disable_ats - disable the ATS capability
 790 * @dev: the PCI device
 791 */
 792void pci_disable_ats(struct pci_dev *dev)
 793{
 794        u16 ctrl;
 795
 796        BUG_ON(!dev->ats || !dev->ats->is_enabled);
 797
 798        pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl);
 799        ctrl &= ~PCI_ATS_CTRL_ENABLE;
 800        pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
 801
 802        dev->ats->is_enabled = 0;
 803
 804        if (dev->is_physfn || dev->is_virtfn) {
 805                struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn;
 806
 807                mutex_lock(&pdev->sriov->lock);
 808                pdev->ats->ref_cnt--;
 809                if (!pdev->ats->ref_cnt)
 810                        ats_free_one(pdev);
 811                mutex_unlock(&pdev->sriov->lock);
 812        }
 813
 814        if (!dev->is_physfn)
 815                ats_free_one(dev);
 816}
 817
 818/**
 819 * pci_ats_queue_depth - query the ATS Invalidate Queue Depth
 820 * @dev: the PCI device
 821 *
 822 * Returns the queue depth on success, or negative on failure.
 823 *
 824 * The ATS spec uses 0 in the Invalidate Queue Depth field to
 825 * indicate that the function can accept 32 Invalidate Request.
 826 * But here we use the `real' values (i.e. 1~32) for the Queue
 827 * Depth; and 0 indicates the function shares the Queue with
 828 * other functions (doesn't exclusively own a Queue).
 829 */
 830int pci_ats_queue_depth(struct pci_dev *dev)
 831{
 832        int pos;
 833        u16 cap;
 834
 835        if (dev->is_virtfn)
 836                return 0;
 837
 838        if (dev->ats)
 839                return dev->ats->qdep;
 840
 841        pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
 842        if (!pos)
 843                return -ENODEV;
 844
 845        pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
 846
 847        return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
 848                                       PCI_ATS_MAX_QDEP;
 849}
 850