linux/drivers/pci/iov.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * PCI Express I/O Virtualization (IOV) support
   4 *   Single Root IOV 1.0
   5 *   Address Translation Service 1.0
   6 *
   7 * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com>
   8 */
   9
  10#include <linux/pci.h>
  11#include <linux/slab.h>
  12#include <linux/export.h>
  13#include <linux/string.h>
  14#include <linux/delay.h>
  15#include "pci.h"
  16
  17#define VIRTFN_ID_LEN   16
  18
  19int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id)
  20{
  21        if (!dev->is_physfn)
  22                return -EINVAL;
  23        return dev->bus->number + ((dev->devfn + dev->sriov->offset +
  24                                    dev->sriov->stride * vf_id) >> 8);
  25}
  26
  27int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
  28{
  29        if (!dev->is_physfn)
  30                return -EINVAL;
  31        return (dev->devfn + dev->sriov->offset +
  32                dev->sriov->stride * vf_id) & 0xff;
  33}
  34
  35/*
  36 * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
  37 * change when NumVFs changes.
  38 *
  39 * Update iov->offset and iov->stride when NumVFs is written.
  40 */
  41static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
  42{
  43        struct pci_sriov *iov = dev->sriov;
  44
  45        pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
  46        pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
  47        pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
  48}
  49
  50/*
  51 * The PF consumes one bus number.  NumVFs, First VF Offset, and VF Stride
  52 * determine how many additional bus numbers will be consumed by VFs.
  53 *
  54 * Iterate over all valid NumVFs, validate offset and stride, and calculate
  55 * the maximum number of bus numbers that could ever be required.
  56 */
  57static int compute_max_vf_buses(struct pci_dev *dev)
  58{
  59        struct pci_sriov *iov = dev->sriov;
  60        int nr_virtfn, busnr, rc = 0;
  61
  62        for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) {
  63                pci_iov_set_numvfs(dev, nr_virtfn);
  64                if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) {
  65                        rc = -EIO;
  66                        goto out;
  67                }
  68
  69                busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
  70                if (busnr > iov->max_VF_buses)
  71                        iov->max_VF_buses = busnr;
  72        }
  73
  74out:
  75        pci_iov_set_numvfs(dev, 0);
  76        return rc;
  77}
  78
  79static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
  80{
  81        struct pci_bus *child;
  82
  83        if (bus->number == busnr)
  84                return bus;
  85
  86        child = pci_find_bus(pci_domain_nr(bus), busnr);
  87        if (child)
  88                return child;
  89
  90        child = pci_add_new_bus(bus, NULL, busnr);
  91        if (!child)
  92                return NULL;
  93
  94        pci_bus_insert_busn_res(child, busnr, busnr);
  95
  96        return child;
  97}
  98
  99static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus)
 100{
 101        if (physbus != virtbus && list_empty(&virtbus->devices))
 102                pci_remove_bus(virtbus);
 103}
 104
 105resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
 106{
 107        if (!dev->is_physfn)
 108                return 0;
 109
 110        return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];
 111}
 112
 113static void pci_read_vf_config_common(struct pci_dev *virtfn)
 114{
 115        struct pci_dev *physfn = virtfn->physfn;
 116
 117        /*
 118         * Some config registers are the same across all associated VFs.
 119         * Read them once from VF0 so we can skip reading them from the
 120         * other VFs.
 121         *
 122         * PCIe r4.0, sec 9.3.4.1, technically doesn't require all VFs to
 123         * have the same Revision ID and Subsystem ID, but we assume they
 124         * do.
 125         */
 126        pci_read_config_dword(virtfn, PCI_CLASS_REVISION,
 127                              &physfn->sriov->class);
 128        pci_read_config_byte(virtfn, PCI_HEADER_TYPE,
 129                             &physfn->sriov->hdr_type);
 130        pci_read_config_word(virtfn, PCI_SUBSYSTEM_VENDOR_ID,
 131                             &physfn->sriov->subsystem_vendor);
 132        pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID,
 133                             &physfn->sriov->subsystem_device);
 134}
 135
 136int pci_iov_add_virtfn(struct pci_dev *dev, int id)
 137{
 138        int i;
 139        int rc = -ENOMEM;
 140        u64 size;
 141        char buf[VIRTFN_ID_LEN];
 142        struct pci_dev *virtfn;
 143        struct resource *res;
 144        struct pci_sriov *iov = dev->sriov;
 145        struct pci_bus *bus;
 146
 147        bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id));
 148        if (!bus)
 149                goto failed;
 150
 151        virtfn = pci_alloc_dev(bus);
 152        if (!virtfn)
 153                goto failed0;
 154
 155        virtfn->devfn = pci_iov_virtfn_devfn(dev, id);
 156        virtfn->vendor = dev->vendor;
 157        virtfn->device = iov->vf_device;
 158        virtfn->is_virtfn = 1;
 159        virtfn->physfn = pci_dev_get(dev);
 160
 161        if (id == 0)
 162                pci_read_vf_config_common(virtfn);
 163
 164        rc = pci_setup_device(virtfn);
 165        if (rc)
 166                goto failed1;
 167
 168        virtfn->dev.parent = dev->dev.parent;
 169        virtfn->multifunction = 0;
 170
 171        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 172                res = &dev->resource[i + PCI_IOV_RESOURCES];
 173                if (!res->parent)
 174                        continue;
 175                virtfn->resource[i].name = pci_name(virtfn);
 176                virtfn->resource[i].flags = res->flags;
 177                size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
 178                virtfn->resource[i].start = res->start + size * id;
 179                virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
 180                rc = request_resource(res, &virtfn->resource[i]);
 181                BUG_ON(rc);
 182        }
 183
 184        pci_device_add(virtfn, virtfn->bus);
 185
 186        sprintf(buf, "virtfn%u", id);
 187        rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
 188        if (rc)
 189                goto failed2;
 190        rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
 191        if (rc)
 192                goto failed3;
 193
 194        kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
 195
 196        pci_bus_add_device(virtfn);
 197
 198        return 0;
 199
 200failed3:
 201        sysfs_remove_link(&dev->dev.kobj, buf);
 202failed2:
 203        pci_stop_and_remove_bus_device(virtfn);
 204failed1:
 205        pci_dev_put(dev);
 206failed0:
 207        virtfn_remove_bus(dev->bus, bus);
 208failed:
 209
 210        return rc;
 211}
 212
 213void pci_iov_remove_virtfn(struct pci_dev *dev, int id)
 214{
 215        char buf[VIRTFN_ID_LEN];
 216        struct pci_dev *virtfn;
 217
 218        virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
 219                                             pci_iov_virtfn_bus(dev, id),
 220                                             pci_iov_virtfn_devfn(dev, id));
 221        if (!virtfn)
 222                return;
 223
 224        sprintf(buf, "virtfn%u", id);
 225        sysfs_remove_link(&dev->dev.kobj, buf);
 226        /*
 227         * pci_stop_dev() could have been called for this virtfn already,
 228         * so the directory for the virtfn may have been removed before.
 229         * Double check to avoid spurious sysfs warnings.
 230         */
 231        if (virtfn->dev.kobj.sd)
 232                sysfs_remove_link(&virtfn->dev.kobj, "physfn");
 233
 234        pci_stop_and_remove_bus_device(virtfn);
 235        virtfn_remove_bus(dev->bus, virtfn->bus);
 236
 237        /* balance pci_get_domain_bus_and_slot() */
 238        pci_dev_put(virtfn);
 239        pci_dev_put(dev);
 240}
 241
 242static ssize_t sriov_totalvfs_show(struct device *dev,
 243                                   struct device_attribute *attr,
 244                                   char *buf)
 245{
 246        struct pci_dev *pdev = to_pci_dev(dev);
 247
 248        return sprintf(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
 249}
 250
 251static ssize_t sriov_numvfs_show(struct device *dev,
 252                                 struct device_attribute *attr,
 253                                 char *buf)
 254{
 255        struct pci_dev *pdev = to_pci_dev(dev);
 256        u16 num_vfs;
 257
 258        /* Serialize vs sriov_numvfs_store() so readers see valid num_VFs */
 259        device_lock(&pdev->dev);
 260        num_vfs = pdev->sriov->num_VFs;
 261        device_unlock(&pdev->dev);
 262
 263        return sprintf(buf, "%u\n", num_vfs);
 264}
 265
 266/*
 267 * num_vfs > 0; number of VFs to enable
 268 * num_vfs = 0; disable all VFs
 269 *
 270 * Note: SRIOV spec does not allow partial VF
 271 *       disable, so it's all or none.
 272 */
 273static ssize_t sriov_numvfs_store(struct device *dev,
 274                                  struct device_attribute *attr,
 275                                  const char *buf, size_t count)
 276{
 277        struct pci_dev *pdev = to_pci_dev(dev);
 278        int ret;
 279        u16 num_vfs;
 280
 281        ret = kstrtou16(buf, 0, &num_vfs);
 282        if (ret < 0)
 283                return ret;
 284
 285        if (num_vfs > pci_sriov_get_totalvfs(pdev))
 286                return -ERANGE;
 287
 288        device_lock(&pdev->dev);
 289
 290        if (num_vfs == pdev->sriov->num_VFs)
 291                goto exit;
 292
 293        /* is PF driver loaded w/callback */
 294        if (!pdev->driver || !pdev->driver->sriov_configure) {
 295                pci_info(pdev, "Driver does not support SRIOV configuration via sysfs\n");
 296                ret = -ENOENT;
 297                goto exit;
 298        }
 299
 300        if (num_vfs == 0) {
 301                /* disable VFs */
 302                ret = pdev->driver->sriov_configure(pdev, 0);
 303                goto exit;
 304        }
 305
 306        /* enable VFs */
 307        if (pdev->sriov->num_VFs) {
 308                pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",
 309                         pdev->sriov->num_VFs, num_vfs);
 310                ret = -EBUSY;
 311                goto exit;
 312        }
 313
 314        ret = pdev->driver->sriov_configure(pdev, num_vfs);
 315        if (ret < 0)
 316                goto exit;
 317
 318        if (ret != num_vfs)
 319                pci_warn(pdev, "%d VFs requested; only %d enabled\n",
 320                         num_vfs, ret);
 321
 322exit:
 323        device_unlock(&pdev->dev);
 324
 325        if (ret < 0)
 326                return ret;
 327
 328        return count;
 329}
 330
 331static ssize_t sriov_offset_show(struct device *dev,
 332                                 struct device_attribute *attr,
 333                                 char *buf)
 334{
 335        struct pci_dev *pdev = to_pci_dev(dev);
 336
 337        return sprintf(buf, "%u\n", pdev->sriov->offset);
 338}
 339
 340static ssize_t sriov_stride_show(struct device *dev,
 341                                 struct device_attribute *attr,
 342                                 char *buf)
 343{
 344        struct pci_dev *pdev = to_pci_dev(dev);
 345
 346        return sprintf(buf, "%u\n", pdev->sriov->stride);
 347}
 348
 349static ssize_t sriov_vf_device_show(struct device *dev,
 350                                    struct device_attribute *attr,
 351                                    char *buf)
 352{
 353        struct pci_dev *pdev = to_pci_dev(dev);
 354
 355        return sprintf(buf, "%x\n", pdev->sriov->vf_device);
 356}
 357
 358static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
 359                                            struct device_attribute *attr,
 360                                            char *buf)
 361{
 362        struct pci_dev *pdev = to_pci_dev(dev);
 363
 364        return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe);
 365}
 366
 367static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
 368                                             struct device_attribute *attr,
 369                                             const char *buf, size_t count)
 370{
 371        struct pci_dev *pdev = to_pci_dev(dev);
 372        bool drivers_autoprobe;
 373
 374        if (kstrtobool(buf, &drivers_autoprobe) < 0)
 375                return -EINVAL;
 376
 377        pdev->sriov->drivers_autoprobe = drivers_autoprobe;
 378
 379        return count;
 380}
 381
 382static DEVICE_ATTR_RO(sriov_totalvfs);
 383static DEVICE_ATTR_RW(sriov_numvfs);
 384static DEVICE_ATTR_RO(sriov_offset);
 385static DEVICE_ATTR_RO(sriov_stride);
 386static DEVICE_ATTR_RO(sriov_vf_device);
 387static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
 388
 389static struct attribute *sriov_dev_attrs[] = {
 390        &dev_attr_sriov_totalvfs.attr,
 391        &dev_attr_sriov_numvfs.attr,
 392        &dev_attr_sriov_offset.attr,
 393        &dev_attr_sriov_stride.attr,
 394        &dev_attr_sriov_vf_device.attr,
 395        &dev_attr_sriov_drivers_autoprobe.attr,
 396        NULL,
 397};
 398
 399static umode_t sriov_attrs_are_visible(struct kobject *kobj,
 400                                       struct attribute *a, int n)
 401{
 402        struct device *dev = kobj_to_dev(kobj);
 403
 404        if (!dev_is_pf(dev))
 405                return 0;
 406
 407        return a->mode;
 408}
 409
 410const struct attribute_group sriov_dev_attr_group = {
 411        .attrs = sriov_dev_attrs,
 412        .is_visible = sriov_attrs_are_visible,
 413};
 414
 415int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
 416{
 417        return 0;
 418}
 419
 420int __weak pcibios_sriov_disable(struct pci_dev *pdev)
 421{
 422        return 0;
 423}
 424
 425static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
 426{
 427        unsigned int i;
 428        int rc;
 429
 430        if (dev->no_vf_scan)
 431                return 0;
 432
 433        for (i = 0; i < num_vfs; i++) {
 434                rc = pci_iov_add_virtfn(dev, i);
 435                if (rc)
 436                        goto failed;
 437        }
 438        return 0;
 439failed:
 440        while (i--)
 441                pci_iov_remove_virtfn(dev, i);
 442
 443        return rc;
 444}
 445
 446static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 447{
 448        int rc;
 449        int i;
 450        int nres;
 451        u16 initial;
 452        struct resource *res;
 453        struct pci_dev *pdev;
 454        struct pci_sriov *iov = dev->sriov;
 455        int bars = 0;
 456        int bus;
 457
 458        if (!nr_virtfn)
 459                return 0;
 460
 461        if (iov->num_VFs)
 462                return -EINVAL;
 463
 464        pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
 465        if (initial > iov->total_VFs ||
 466            (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total_VFs)))
 467                return -EIO;
 468
 469        if (nr_virtfn < 0 || nr_virtfn > iov->total_VFs ||
 470            (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
 471                return -EINVAL;
 472
 473        nres = 0;
 474        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 475                bars |= (1 << (i + PCI_IOV_RESOURCES));
 476                res = &dev->resource[i + PCI_IOV_RESOURCES];
 477                if (res->parent)
 478                        nres++;
 479        }
 480        if (nres != iov->nres) {
 481                pci_err(dev, "not enough MMIO resources for SR-IOV\n");
 482                return -ENOMEM;
 483        }
 484
 485        bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
 486        if (bus > dev->bus->busn_res.end) {
 487                pci_err(dev, "can't enable %d VFs (bus %02x out of range of %pR)\n",
 488                        nr_virtfn, bus, &dev->bus->busn_res);
 489                return -ENOMEM;
 490        }
 491
 492        if (pci_enable_resources(dev, bars)) {
 493                pci_err(dev, "SR-IOV: IOV BARS not allocated\n");
 494                return -ENOMEM;
 495        }
 496
 497        if (iov->link != dev->devfn) {
 498                pdev = pci_get_slot(dev->bus, iov->link);
 499                if (!pdev)
 500                        return -ENODEV;
 501
 502                if (!pdev->is_physfn) {
 503                        pci_dev_put(pdev);
 504                        return -ENOSYS;
 505                }
 506
 507                rc = sysfs_create_link(&dev->dev.kobj,
 508                                        &pdev->dev.kobj, "dep_link");
 509                pci_dev_put(pdev);
 510                if (rc)
 511                        return rc;
 512        }
 513
 514        iov->initial_VFs = initial;
 515        if (nr_virtfn < initial)
 516                initial = nr_virtfn;
 517
 518        rc = pcibios_sriov_enable(dev, initial);
 519        if (rc) {
 520                pci_err(dev, "failure %d from pcibios_sriov_enable()\n", rc);
 521                goto err_pcibios;
 522        }
 523
 524        pci_iov_set_numvfs(dev, nr_virtfn);
 525        iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
 526        pci_cfg_access_lock(dev);
 527        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 528        msleep(100);
 529        pci_cfg_access_unlock(dev);
 530
 531        rc = sriov_add_vfs(dev, initial);
 532        if (rc)
 533                goto err_pcibios;
 534
 535        kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
 536        iov->num_VFs = nr_virtfn;
 537
 538        return 0;
 539
 540err_pcibios:
 541        iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
 542        pci_cfg_access_lock(dev);
 543        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 544        ssleep(1);
 545        pci_cfg_access_unlock(dev);
 546
 547        pcibios_sriov_disable(dev);
 548
 549        if (iov->link != dev->devfn)
 550                sysfs_remove_link(&dev->dev.kobj, "dep_link");
 551
 552        pci_iov_set_numvfs(dev, 0);
 553        return rc;
 554}
 555
 556static void sriov_del_vfs(struct pci_dev *dev)
 557{
 558        struct pci_sriov *iov = dev->sriov;
 559        int i;
 560
 561        if (dev->no_vf_scan)
 562                return;
 563
 564        for (i = 0; i < iov->num_VFs; i++)
 565                pci_iov_remove_virtfn(dev, i);
 566}
 567
 568static void sriov_disable(struct pci_dev *dev)
 569{
 570        struct pci_sriov *iov = dev->sriov;
 571
 572        if (!iov->num_VFs)
 573                return;
 574
 575        sriov_del_vfs(dev);
 576        iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
 577        pci_cfg_access_lock(dev);
 578        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 579        ssleep(1);
 580        pci_cfg_access_unlock(dev);
 581
 582        pcibios_sriov_disable(dev);
 583
 584        if (iov->link != dev->devfn)
 585                sysfs_remove_link(&dev->dev.kobj, "dep_link");
 586
 587        iov->num_VFs = 0;
 588        pci_iov_set_numvfs(dev, 0);
 589}
 590
 591static int sriov_init(struct pci_dev *dev, int pos)
 592{
 593        int i, bar64;
 594        int rc;
 595        int nres;
 596        u32 pgsz;
 597        u16 ctrl, total;
 598        struct pci_sriov *iov;
 599        struct resource *res;
 600        struct pci_dev *pdev;
 601
 602        pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
 603        if (ctrl & PCI_SRIOV_CTRL_VFE) {
 604                pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
 605                ssleep(1);
 606        }
 607
 608        ctrl = 0;
 609        list_for_each_entry(pdev, &dev->bus->devices, bus_list)
 610                if (pdev->is_physfn)
 611                        goto found;
 612
 613        pdev = NULL;
 614        if (pci_ari_enabled(dev->bus))
 615                ctrl |= PCI_SRIOV_CTRL_ARI;
 616
 617found:
 618        pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
 619
 620        pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
 621        if (!total)
 622                return 0;
 623
 624        pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
 625        i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
 626        pgsz &= ~((1 << i) - 1);
 627        if (!pgsz)
 628                return -EIO;
 629
 630        pgsz &= ~(pgsz - 1);
 631        pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
 632
 633        iov = kzalloc(sizeof(*iov), GFP_KERNEL);
 634        if (!iov)
 635                return -ENOMEM;
 636
 637        nres = 0;
 638        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 639                res = &dev->resource[i + PCI_IOV_RESOURCES];
 640                /*
 641                 * If it is already FIXED, don't change it, something
 642                 * (perhaps EA or header fixups) wants it this way.
 643                 */
 644                if (res->flags & IORESOURCE_PCI_FIXED)
 645                        bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
 646                else
 647                        bar64 = __pci_read_base(dev, pci_bar_unknown, res,
 648                                                pos + PCI_SRIOV_BAR + i * 4);
 649                if (!res->flags)
 650                        continue;
 651                if (resource_size(res) & (PAGE_SIZE - 1)) {
 652                        rc = -EIO;
 653                        goto failed;
 654                }
 655                iov->barsz[i] = resource_size(res);
 656                res->end = res->start + resource_size(res) * total - 1;
 657                pci_info(dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n",
 658                         i, res, i, total);
 659                i += bar64;
 660                nres++;
 661        }
 662
 663        iov->pos = pos;
 664        iov->nres = nres;
 665        iov->ctrl = ctrl;
 666        iov->total_VFs = total;
 667        iov->driver_max_VFs = total;
 668        pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &iov->vf_device);
 669        iov->pgsz = pgsz;
 670        iov->self = dev;
 671        iov->drivers_autoprobe = true;
 672        pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
 673        pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
 674        if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
 675                iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
 676
 677        if (pdev)
 678                iov->dev = pci_dev_get(pdev);
 679        else
 680                iov->dev = dev;
 681
 682        dev->sriov = iov;
 683        dev->is_physfn = 1;
 684        rc = compute_max_vf_buses(dev);
 685        if (rc)
 686                goto fail_max_buses;
 687
 688        return 0;
 689
 690fail_max_buses:
 691        dev->sriov = NULL;
 692        dev->is_physfn = 0;
 693failed:
 694        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
 695                res = &dev->resource[i + PCI_IOV_RESOURCES];
 696                res->flags = 0;
 697        }
 698
 699        kfree(iov);
 700        return rc;
 701}
 702
 703static void sriov_release(struct pci_dev *dev)
 704{
 705        BUG_ON(dev->sriov->num_VFs);
 706
 707        if (dev != dev->sriov->dev)
 708                pci_dev_put(dev->sriov->dev);
 709
 710        kfree(dev->sriov);
 711        dev->sriov = NULL;
 712}
 713
 714static void sriov_restore_state(struct pci_dev *dev)
 715{
 716        int i;
 717        u16 ctrl;
 718        struct pci_sriov *iov = dev->sriov;
 719
 720        pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
 721        if (ctrl & PCI_SRIOV_CTRL_VFE)
 722                return;
 723
 724        /*
 725         * Restore PCI_SRIOV_CTRL_ARI before pci_iov_set_numvfs() because
 726         * it reads offset & stride, which depend on PCI_SRIOV_CTRL_ARI.
 727         */
 728        ctrl &= ~PCI_SRIOV_CTRL_ARI;
 729        ctrl |= iov->ctrl & PCI_SRIOV_CTRL_ARI;
 730        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl);
 731
 732        for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
 733                pci_update_resource(dev, i + PCI_IOV_RESOURCES);
 734
 735        pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
 736        pci_iov_set_numvfs(dev, iov->num_VFs);
 737        pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 738        if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
 739                msleep(100);
 740}
 741
 742/**
 743 * pci_iov_init - initialize the IOV capability
 744 * @dev: the PCI device
 745 *
 746 * Returns 0 on success, or negative on failure.
 747 */
 748int pci_iov_init(struct pci_dev *dev)
 749{
 750        int pos;
 751
 752        if (!pci_is_pcie(dev))
 753                return -ENODEV;
 754
 755        pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
 756        if (pos)
 757                return sriov_init(dev, pos);
 758
 759        return -ENODEV;
 760}
 761
 762/**
 763 * pci_iov_release - release resources used by the IOV capability
 764 * @dev: the PCI device
 765 */
 766void pci_iov_release(struct pci_dev *dev)
 767{
 768        if (dev->is_physfn)
 769                sriov_release(dev);
 770}
 771
 772/**
 773 * pci_iov_remove - clean up SR-IOV state after PF driver is detached
 774 * @dev: the PCI device
 775 */
 776void pci_iov_remove(struct pci_dev *dev)
 777{
 778        struct pci_sriov *iov = dev->sriov;
 779
 780        if (!dev->is_physfn)
 781                return;
 782
 783        iov->driver_max_VFs = iov->total_VFs;
 784        if (iov->num_VFs)
 785                pci_warn(dev, "driver left SR-IOV enabled after remove\n");
 786}
 787
 788/**
 789 * pci_iov_update_resource - update a VF BAR
 790 * @dev: the PCI device
 791 * @resno: the resource number
 792 *
 793 * Update a VF BAR in the SR-IOV capability of a PF.
 794 */
 795void pci_iov_update_resource(struct pci_dev *dev, int resno)
 796{
 797        struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL;
 798        struct resource *res = dev->resource + resno;
 799        int vf_bar = resno - PCI_IOV_RESOURCES;
 800        struct pci_bus_region region;
 801        u16 cmd;
 802        u32 new;
 803        int reg;
 804
 805        /*
 806         * The generic pci_restore_bars() path calls this for all devices,
 807         * including VFs and non-SR-IOV devices.  If this is not a PF, we
 808         * have nothing to do.
 809         */
 810        if (!iov)
 811                return;
 812
 813        pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &cmd);
 814        if ((cmd & PCI_SRIOV_CTRL_VFE) && (cmd & PCI_SRIOV_CTRL_MSE)) {
 815                dev_WARN(&dev->dev, "can't update enabled VF BAR%d %pR\n",
 816                         vf_bar, res);
 817                return;
 818        }
 819
 820        /*
 821         * Ignore unimplemented BARs, unused resource slots for 64-bit
 822         * BARs, and non-movable resources, e.g., those described via
 823         * Enhanced Allocation.
 824         */
 825        if (!res->flags)
 826                return;
 827
 828        if (res->flags & IORESOURCE_UNSET)
 829                return;
 830
 831        if (res->flags & IORESOURCE_PCI_FIXED)
 832                return;
 833
 834        pcibios_resource_to_bus(dev->bus, &region, res);
 835        new = region.start;
 836        new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK;
 837
 838        reg = iov->pos + PCI_SRIOV_BAR + 4 * vf_bar;
 839        pci_write_config_dword(dev, reg, new);
 840        if (res->flags & IORESOURCE_MEM_64) {
 841                new = region.start >> 16 >> 16;
 842                pci_write_config_dword(dev, reg + 4, new);
 843        }
 844}
 845
 846resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev,
 847                                                      int resno)
 848{
 849        return pci_iov_resource_size(dev, resno);
 850}
 851
 852/**
 853 * pci_sriov_resource_alignment - get resource alignment for VF BAR
 854 * @dev: the PCI device
 855 * @resno: the resource number
 856 *
 857 * Returns the alignment of the VF BAR found in the SR-IOV capability.
 858 * This is not the same as the resource size which is defined as
 859 * the VF BAR size multiplied by the number of VFs.  The alignment
 860 * is just the VF BAR size.
 861 */
 862resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
 863{
 864        return pcibios_iov_resource_alignment(dev, resno);
 865}
 866
 867/**
 868 * pci_restore_iov_state - restore the state of the IOV capability
 869 * @dev: the PCI device
 870 */
 871void pci_restore_iov_state(struct pci_dev *dev)
 872{
 873        if (dev->is_physfn)
 874                sriov_restore_state(dev);
 875}
 876
 877/**
 878 * pci_vf_drivers_autoprobe - set PF property drivers_autoprobe for VFs
 879 * @dev: the PCI device
 880 * @auto_probe: set VF drivers auto probe flag
 881 */
 882void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe)
 883{
 884        if (dev->is_physfn)
 885                dev->sriov->drivers_autoprobe = auto_probe;
 886}
 887
 888/**
 889 * pci_iov_bus_range - find bus range used by Virtual Function
 890 * @bus: the PCI bus
 891 *
 892 * Returns max number of buses (exclude current one) used by Virtual
 893 * Functions.
 894 */
 895int pci_iov_bus_range(struct pci_bus *bus)
 896{
 897        int max = 0;
 898        struct pci_dev *dev;
 899
 900        list_for_each_entry(dev, &bus->devices, bus_list) {
 901                if (!dev->is_physfn)
 902                        continue;
 903                if (dev->sriov->max_VF_buses > max)
 904                        max = dev->sriov->max_VF_buses;
 905        }
 906
 907        return max ? max - bus->number : 0;
 908}
 909
 910/**
 911 * pci_enable_sriov - enable the SR-IOV capability
 912 * @dev: the PCI device
 913 * @nr_virtfn: number of virtual functions to enable
 914 *
 915 * Returns 0 on success, or negative on failure.
 916 */
 917int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
 918{
 919        might_sleep();
 920
 921        if (!dev->is_physfn)
 922                return -ENOSYS;
 923
 924        return sriov_enable(dev, nr_virtfn);
 925}
 926EXPORT_SYMBOL_GPL(pci_enable_sriov);
 927
 928/**
 929 * pci_disable_sriov - disable the SR-IOV capability
 930 * @dev: the PCI device
 931 */
 932void pci_disable_sriov(struct pci_dev *dev)
 933{
 934        might_sleep();
 935
 936        if (!dev->is_physfn)
 937                return;
 938
 939        sriov_disable(dev);
 940}
 941EXPORT_SYMBOL_GPL(pci_disable_sriov);
 942
 943/**
 944 * pci_num_vf - return number of VFs associated with a PF device_release_driver
 945 * @dev: the PCI device
 946 *
 947 * Returns number of VFs, or 0 if SR-IOV is not enabled.
 948 */
 949int pci_num_vf(struct pci_dev *dev)
 950{
 951        if (!dev->is_physfn)
 952                return 0;
 953
 954        return dev->sriov->num_VFs;
 955}
 956EXPORT_SYMBOL_GPL(pci_num_vf);
 957
 958/**
 959 * pci_vfs_assigned - returns number of VFs are assigned to a guest
 960 * @dev: the PCI device
 961 *
 962 * Returns number of VFs belonging to this device that are assigned to a guest.
 963 * If device is not a physical function returns 0.
 964 */
 965int pci_vfs_assigned(struct pci_dev *dev)
 966{
 967        struct pci_dev *vfdev;
 968        unsigned int vfs_assigned = 0;
 969        unsigned short dev_id;
 970
 971        /* only search if we are a PF */
 972        if (!dev->is_physfn)
 973                return 0;
 974
 975        /*
 976         * determine the device ID for the VFs, the vendor ID will be the
 977         * same as the PF so there is no need to check for that one
 978         */
 979        dev_id = dev->sriov->vf_device;
 980
 981        /* loop through all the VFs to see if we own any that are assigned */
 982        vfdev = pci_get_device(dev->vendor, dev_id, NULL);
 983        while (vfdev) {
 984                /*
 985                 * It is considered assigned if it is a virtual function with
 986                 * our dev as the physical function and the assigned bit is set
 987                 */
 988                if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
 989                        pci_is_dev_assigned(vfdev))
 990                        vfs_assigned++;
 991
 992                vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
 993        }
 994
 995        return vfs_assigned;
 996}
 997EXPORT_SYMBOL_GPL(pci_vfs_assigned);
 998
 999/**
1000 * pci_sriov_set_totalvfs -- reduce the TotalVFs available
1001 * @dev: the PCI PF device
1002 * @numvfs: number that should be used for TotalVFs supported
1003 *
1004 * Should be called from PF driver's probe routine with
1005 * device's mutex held.
1006 *
1007 * Returns 0 if PF is an SRIOV-capable device and
1008 * value of numvfs valid. If not a PF return -ENOSYS;
1009 * if numvfs is invalid return -EINVAL;
1010 * if VFs already enabled, return -EBUSY.
1011 */
1012int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
1013{
1014        if (!dev->is_physfn)
1015                return -ENOSYS;
1016
1017        if (numvfs > dev->sriov->total_VFs)
1018                return -EINVAL;
1019
1020        /* Shouldn't change if VFs already enabled */
1021        if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE)
1022                return -EBUSY;
1023
1024        dev->sriov->driver_max_VFs = numvfs;
1025        return 0;
1026}
1027EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs);
1028
1029/**
1030 * pci_sriov_get_totalvfs -- get total VFs supported on this device
1031 * @dev: the PCI PF device
1032 *
1033 * For a PCIe device with SRIOV support, return the PCIe
1034 * SRIOV capability value of TotalVFs or the value of driver_max_VFs
1035 * if the driver reduced it.  Otherwise 0.
1036 */
1037int pci_sriov_get_totalvfs(struct pci_dev *dev)
1038{
1039        if (!dev->is_physfn)
1040                return 0;
1041
1042        return dev->sriov->driver_max_VFs;
1043}
1044EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs);
1045
1046/**
1047 * pci_sriov_configure_simple - helper to configure SR-IOV
1048 * @dev: the PCI device
1049 * @nr_virtfn: number of virtual functions to enable, 0 to disable
1050 *
1051 * Enable or disable SR-IOV for devices that don't require any PF setup
1052 * before enabling SR-IOV.  Return value is negative on error, or number of
1053 * VFs allocated on success.
1054 */
1055int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn)
1056{
1057        int rc;
1058
1059        might_sleep();
1060
1061        if (!dev->is_physfn)
1062                return -ENODEV;
1063
1064        if (pci_vfs_assigned(dev)) {
1065                pci_warn(dev, "Cannot modify SR-IOV while VFs are assigned\n");
1066                return -EPERM;
1067        }
1068
1069        if (nr_virtfn == 0) {
1070                sriov_disable(dev);
1071                return 0;
1072        }
1073
1074        rc = sriov_enable(dev, nr_virtfn);
1075        if (rc < 0)
1076                return rc;
1077
1078        return nr_virtfn;
1079}
1080EXPORT_SYMBOL_GPL(pci_sriov_configure_simple);
1081