linux/drivers/infiniband/hw/qib/qib_pcie.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2010 - 2017 Intel Corporation.  All rights reserved.
   3 * Copyright (c) 2008, 2009 QLogic Corporation. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/pci.h>
  35#include <linux/io.h>
  36#include <linux/delay.h>
  37#include <linux/vmalloc.h>
  38#include <linux/aer.h>
  39#include <linux/module.h>
  40
  41#include "qib.h"
  42
  43/*
  44 * This file contains PCIe utility routines that are common to the
  45 * various QLogic InfiniPath adapters
  46 */
  47
  48/*
  49 * Code to adjust PCIe capabilities.
  50 * To minimize the change footprint, we call it
  51 * from qib_pcie_params, which every chip-specific
  52 * file calls, even though this violates some
  53 * expectations of harmlessness.
  54 */
  55static void qib_tune_pcie_caps(struct qib_devdata *);
  56static void qib_tune_pcie_coalesce(struct qib_devdata *);
  57
  58/*
  59 * Do all the common PCIe setup and initialization.
  60 * devdata is not yet allocated, and is not allocated until after this
  61 * routine returns success.  Therefore qib_dev_err() can't be used for error
  62 * printing.
  63 */
  64int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
  65{
  66        int ret;
  67
  68        ret = pci_enable_device(pdev);
  69        if (ret) {
  70                /*
  71                 * This can happen (in theory) iff:
  72                 * We did a chip reset, and then failed to reprogram the
  73                 * BAR, or the chip reset due to an internal error.  We then
  74                 * unloaded the driver and reloaded it.
  75                 *
  76                 * Both reset cases set the BAR back to initial state.  For
  77                 * the latter case, the AER sticky error bit at offset 0x718
  78                 * should be set, but the Linux kernel doesn't yet know
  79                 * about that, it appears.  If the original BAR was retained
  80                 * in the kernel data structures, this may be OK.
  81                 */
  82                qib_early_err(&pdev->dev, "pci enable failed: error %d\n",
  83                              -ret);
  84                goto done;
  85        }
  86
  87        ret = pci_request_regions(pdev, QIB_DRV_NAME);
  88        if (ret) {
  89                qib_devinfo(pdev, "pci_request_regions fails: err %d\n", -ret);
  90                goto bail;
  91        }
  92
  93        ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
  94        if (ret) {
  95                /*
  96                 * If the 64 bit setup fails, try 32 bit.  Some systems
  97                 * do not setup 64 bit maps on systems with 2GB or less
  98                 * memory installed.
  99                 */
 100                ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 101                if (ret) {
 102                        qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret);
 103                        goto bail;
 104                }
 105        }
 106
 107        pci_set_master(pdev);
 108        ret = pci_enable_pcie_error_reporting(pdev);
 109        if (ret) {
 110                qib_early_err(&pdev->dev,
 111                              "Unable to enable pcie error reporting: %d\n",
 112                              ret);
 113                ret = 0;
 114        }
 115        goto done;
 116
 117bail:
 118        pci_disable_device(pdev);
 119        pci_release_regions(pdev);
 120done:
 121        return ret;
 122}
 123
 124/*
 125 * Do remaining PCIe setup, once dd is allocated, and save away
 126 * fields required to re-initialize after a chip reset, or for
 127 * various other purposes
 128 */
 129int qib_pcie_ddinit(struct qib_devdata *dd, struct pci_dev *pdev,
 130                    const struct pci_device_id *ent)
 131{
 132        unsigned long len;
 133        resource_size_t addr;
 134
 135        dd->pcidev = pdev;
 136        pci_set_drvdata(pdev, dd);
 137
 138        addr = pci_resource_start(pdev, 0);
 139        len = pci_resource_len(pdev, 0);
 140
 141        dd->kregbase = ioremap(addr, len);
 142        if (!dd->kregbase)
 143                return -ENOMEM;
 144
 145        dd->kregend = (u64 __iomem *)((void __iomem *) dd->kregbase + len);
 146        dd->physaddr = addr;        /* used for io_remap, etc. */
 147
 148        /*
 149         * Save BARs to rewrite after device reset.  Save all 64 bits of
 150         * BAR, just in case.
 151         */
 152        dd->pcibar0 = addr;
 153        dd->pcibar1 = addr >> 32;
 154        dd->deviceid = ent->device; /* save for later use */
 155        dd->vendorid = ent->vendor;
 156
 157        return 0;
 158}
 159
 160/*
 161 * Do PCIe cleanup, after chip-specific cleanup, etc.  Just prior
 162 * to releasing the dd memory.
 163 * void because none of the core pcie cleanup returns are void
 164 */
 165void qib_pcie_ddcleanup(struct qib_devdata *dd)
 166{
 167        u64 __iomem *base = (void __iomem *) dd->kregbase;
 168
 169        dd->kregbase = NULL;
 170        iounmap(base);
 171        if (dd->piobase)
 172                iounmap(dd->piobase);
 173        if (dd->userbase)
 174                iounmap(dd->userbase);
 175        if (dd->piovl15base)
 176                iounmap(dd->piovl15base);
 177
 178        pci_disable_device(dd->pcidev);
 179        pci_release_regions(dd->pcidev);
 180
 181        pci_set_drvdata(dd->pcidev, NULL);
 182}
 183
 184/*
 185 * We save the msi lo and hi values, so we can restore them after
 186 * chip reset (the kernel PCI infrastructure doesn't yet handle that
 187 * correctly.
 188 */
 189static void qib_cache_msi_info(struct qib_devdata *dd, int pos)
 190{
 191        struct pci_dev *pdev = dd->pcidev;
 192        u16 control;
 193
 194        pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, &dd->msi_lo);
 195        pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, &dd->msi_hi);
 196        pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &control);
 197
 198        /* now save the data (vector) info */
 199        pci_read_config_word(pdev,
 200                             pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
 201                             &dd->msi_data);
 202}
 203
 204int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent)
 205{
 206        u16 linkstat, speed;
 207        int nvec;
 208        int maxvec;
 209        unsigned int flags = PCI_IRQ_MSIX | PCI_IRQ_MSI;
 210
 211        if (!pci_is_pcie(dd->pcidev)) {
 212                qib_dev_err(dd, "Can't find PCI Express capability!\n");
 213                /* set up something... */
 214                dd->lbus_width = 1;
 215                dd->lbus_speed = 2500; /* Gen1, 2.5GHz */
 216                nvec = -1;
 217                goto bail;
 218        }
 219
 220        if (dd->flags & QIB_HAS_INTX)
 221                flags |= PCI_IRQ_LEGACY;
 222        maxvec = (nent && *nent) ? *nent : 1;
 223        nvec = pci_alloc_irq_vectors(dd->pcidev, 1, maxvec, flags);
 224        if (nvec < 0)
 225                goto bail;
 226
 227        /*
 228         * If nent exists, make sure to record how many vectors were allocated.
 229         * If msix_enabled is false, return 0 so the fallback code works
 230         * correctly.
 231         */
 232        if (nent)
 233                *nent = !dd->pcidev->msix_enabled ? 0 : nvec;
 234
 235        if (dd->pcidev->msi_enabled)
 236                qib_cache_msi_info(dd, dd->pcidev->msi_cap);
 237
 238        pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
 239        /*
 240         * speed is bits 0-3, linkwidth is bits 4-8
 241         * no defines for them in headers
 242         */
 243        speed = linkstat & 0xf;
 244        linkstat >>= 4;
 245        linkstat &= 0x1f;
 246        dd->lbus_width = linkstat;
 247
 248        switch (speed) {
 249        case 1:
 250                dd->lbus_speed = 2500; /* Gen1, 2.5GHz */
 251                break;
 252        case 2:
 253                dd->lbus_speed = 5000; /* Gen1, 5GHz */
 254                break;
 255        default: /* not defined, assume gen1 */
 256                dd->lbus_speed = 2500;
 257                break;
 258        }
 259
 260        /*
 261         * Check against expected pcie width and complain if "wrong"
 262         * on first initialization, not afterwards (i.e., reset).
 263         */
 264        if (minw && linkstat < minw)
 265                qib_dev_err(dd,
 266                            "PCIe width %u (x%u HCA), performance reduced\n",
 267                            linkstat, minw);
 268
 269        qib_tune_pcie_caps(dd);
 270
 271        qib_tune_pcie_coalesce(dd);
 272
 273bail:
 274        /* fill in string, even on errors */
 275        snprintf(dd->lbus_info, sizeof(dd->lbus_info),
 276                 "PCIe,%uMHz,x%u\n", dd->lbus_speed, dd->lbus_width);
 277        return nvec < 0 ? nvec : 0;
 278}
 279
 280/**
 281 * qib_free_irq - Cleanup INTx and MSI interrupts
 282 * @dd: valid pointer to qib dev data
 283 *
 284 * Since cleanup for INTx and MSI interrupts is trivial, have a common
 285 * routine.
 286 *
 287 */
 288void qib_free_irq(struct qib_devdata *dd)
 289{
 290        pci_free_irq(dd->pcidev, 0, dd);
 291        pci_free_irq_vectors(dd->pcidev);
 292}
 293
 294/*
 295 * Setup pcie interrupt stuff again after a reset.  I'd like to just call
 296 * pci_enable_msi() again for msi, but when I do that,
 297 * the MSI enable bit doesn't get set in the command word, and
 298 * we switch to to a different interrupt vector, which is confusing,
 299 * so I instead just do it all inline.  Perhaps somehow can tie this
 300 * into the PCIe hotplug support at some point
 301 */
 302int qib_reinit_intr(struct qib_devdata *dd)
 303{
 304        int pos;
 305        u16 control;
 306        int ret = 0;
 307
 308        /* If we aren't using MSI, don't restore it */
 309        if (!dd->msi_lo)
 310                goto bail;
 311
 312        pos = dd->pcidev->msi_cap;
 313        if (!pos) {
 314                qib_dev_err(dd,
 315                        "Can't find MSI capability, can't restore MSI settings\n");
 316                ret = 0;
 317                /* nothing special for MSIx, just MSI */
 318                goto bail;
 319        }
 320        pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
 321                               dd->msi_lo);
 322        pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
 323                               dd->msi_hi);
 324        pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
 325        if (!(control & PCI_MSI_FLAGS_ENABLE)) {
 326                control |= PCI_MSI_FLAGS_ENABLE;
 327                pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
 328                                      control);
 329        }
 330        /* now rewrite the data (vector) info */
 331        pci_write_config_word(dd->pcidev, pos +
 332                              ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
 333                              dd->msi_data);
 334        ret = 1;
 335bail:
 336        qib_free_irq(dd);
 337
 338        if (!ret && (dd->flags & QIB_HAS_INTX))
 339                ret = 1;
 340
 341        /* and now set the pci master bit again */
 342        pci_set_master(dd->pcidev);
 343
 344        return ret;
 345}
 346
 347/*
 348 * These two routines are helper routines for the device reset code
 349 * to move all the pcie code out of the chip-specific driver code.
 350 */
 351void qib_pcie_getcmd(struct qib_devdata *dd, u16 *cmd, u8 *iline, u8 *cline)
 352{
 353        pci_read_config_word(dd->pcidev, PCI_COMMAND, cmd);
 354        pci_read_config_byte(dd->pcidev, PCI_INTERRUPT_LINE, iline);
 355        pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline);
 356}
 357
 358void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
 359{
 360        int r;
 361
 362        r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
 363                                   dd->pcibar0);
 364        if (r)
 365                qib_dev_err(dd, "rewrite of BAR0 failed: %d\n", r);
 366        r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
 367                                   dd->pcibar1);
 368        if (r)
 369                qib_dev_err(dd, "rewrite of BAR1 failed: %d\n", r);
 370        /* now re-enable memory access, and restore cosmetic settings */
 371        pci_write_config_word(dd->pcidev, PCI_COMMAND, cmd);
 372        pci_write_config_byte(dd->pcidev, PCI_INTERRUPT_LINE, iline);
 373        pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline);
 374        r = pci_enable_device(dd->pcidev);
 375        if (r)
 376                qib_dev_err(dd,
 377                        "pci_enable_device failed after reset: %d\n", r);
 378}
 379
 380
 381static int qib_pcie_coalesce;
 382module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
 383MODULE_PARM_DESC(pcie_coalesce, "tune PCIe coalescing on some Intel chipsets");
 384
 385/*
 386 * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300
 387 * chipsets.   This is known to be unsafe for some revisions of some
 388 * of these chipsets, with some BIOS settings, and enabling it on those
 389 * systems may result in the system crashing, and/or data corruption.
 390 */
 391static void qib_tune_pcie_coalesce(struct qib_devdata *dd)
 392{
 393        struct pci_dev *parent;
 394        u16 devid;
 395        u32 mask, bits, val;
 396
 397        if (!qib_pcie_coalesce)
 398                return;
 399
 400        /* Find out supported and configured values for parent (root) */
 401        parent = dd->pcidev->bus->self;
 402        if (parent->bus->parent) {
 403                qib_devinfo(dd->pcidev, "Parent not root\n");
 404                return;
 405        }
 406        if (!pci_is_pcie(parent))
 407                return;
 408        if (parent->vendor != 0x8086)
 409                return;
 410
 411        /*
 412         *  - bit 12: Max_rdcmp_Imt_EN: need to set to 1
 413         *  - bit 11: COALESCE_FORCE: need to set to 0
 414         *  - bit 10: COALESCE_EN: need to set to 1
 415         *  (but limitations on some on some chipsets)
 416         *
 417         *  On the Intel 5000, 5100, and 7300 chipsets, there is
 418         *  also: - bit 25:24: COALESCE_MODE, need to set to 0
 419         */
 420        devid = parent->device;
 421        if (devid >= 0x25e2 && devid <= 0x25fa) {
 422                /* 5000 P/V/X/Z */
 423                if (parent->revision <= 0xb2)
 424                        bits = 1U << 10;
 425                else
 426                        bits = 7U << 10;
 427                mask = (3U << 24) | (7U << 10);
 428        } else if (devid >= 0x65e2 && devid <= 0x65fa) {
 429                /* 5100 */
 430                bits = 1U << 10;
 431                mask = (3U << 24) | (7U << 10);
 432        } else if (devid >= 0x4021 && devid <= 0x402e) {
 433                /* 5400 */
 434                bits = 7U << 10;
 435                mask = 7U << 10;
 436        } else if (devid >= 0x3604 && devid <= 0x360a) {
 437                /* 7300 */
 438                bits = 7U << 10;
 439                mask = (3U << 24) | (7U << 10);
 440        } else {
 441                /* not one of the chipsets that we know about */
 442                return;
 443        }
 444        pci_read_config_dword(parent, 0x48, &val);
 445        val &= ~mask;
 446        val |= bits;
 447        pci_write_config_dword(parent, 0x48, val);
 448}
 449
 450/*
 451 * BIOS may not set PCIe bus-utilization parameters for best performance.
 452 * Check and optionally adjust them to maximize our throughput.
 453 */
 454static int qib_pcie_caps;
 455module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO);
 456MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
 457
 458static void qib_tune_pcie_caps(struct qib_devdata *dd)
 459{
 460        struct pci_dev *parent;
 461        u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
 462        u16 rc_mrrs, ep_mrrs, max_mrrs;
 463
 464        /* Find out supported and configured values for parent (root) */
 465        parent = dd->pcidev->bus->self;
 466        if (!pci_is_root_bus(parent->bus)) {
 467                qib_devinfo(dd->pcidev, "Parent not root\n");
 468                return;
 469        }
 470
 471        if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
 472                return;
 473
 474        rc_mpss = parent->pcie_mpss;
 475        rc_mps = ffs(pcie_get_mps(parent)) - 8;
 476        /* Find out supported and configured values for endpoint (us) */
 477        ep_mpss = dd->pcidev->pcie_mpss;
 478        ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8;
 479
 480        /* Find max payload supported by root, endpoint */
 481        if (rc_mpss > ep_mpss)
 482                rc_mpss = ep_mpss;
 483
 484        /* If Supported greater than limit in module param, limit it */
 485        if (rc_mpss > (qib_pcie_caps & 7))
 486                rc_mpss = qib_pcie_caps & 7;
 487        /* If less than (allowed, supported), bump root payload */
 488        if (rc_mpss > rc_mps) {
 489                rc_mps = rc_mpss;
 490                pcie_set_mps(parent, 128 << rc_mps);
 491        }
 492        /* If less than (allowed, supported), bump endpoint payload */
 493        if (rc_mpss > ep_mps) {
 494                ep_mps = rc_mpss;
 495                pcie_set_mps(dd->pcidev, 128 << ep_mps);
 496        }
 497
 498        /*
 499         * Now the Read Request size.
 500         * No field for max supported, but PCIe spec limits it to 4096,
 501         * which is code '5' (log2(4096) - 7)
 502         */
 503        max_mrrs = 5;
 504        if (max_mrrs > ((qib_pcie_caps >> 4) & 7))
 505                max_mrrs = (qib_pcie_caps >> 4) & 7;
 506
 507        max_mrrs = 128 << max_mrrs;
 508        rc_mrrs = pcie_get_readrq(parent);
 509        ep_mrrs = pcie_get_readrq(dd->pcidev);
 510
 511        if (max_mrrs > rc_mrrs) {
 512                rc_mrrs = max_mrrs;
 513                pcie_set_readrq(parent, rc_mrrs);
 514        }
 515        if (max_mrrs > ep_mrrs) {
 516                ep_mrrs = max_mrrs;
 517                pcie_set_readrq(dd->pcidev, ep_mrrs);
 518        }
 519}
 520/* End of PCIe capability tuning */
 521
 522/*
 523 * From here through qib_pci_err_handler definition is invoked via
 524 * PCI error infrastructure, registered via pci
 525 */
 526static pci_ers_result_t
 527qib_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
 528{
 529        struct qib_devdata *dd = pci_get_drvdata(pdev);
 530        pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED;
 531
 532        switch (state) {
 533        case pci_channel_io_normal:
 534                qib_devinfo(pdev, "State Normal, ignoring\n");
 535                break;
 536
 537        case pci_channel_io_frozen:
 538                qib_devinfo(pdev, "State Frozen, requesting reset\n");
 539                pci_disable_device(pdev);
 540                ret = PCI_ERS_RESULT_NEED_RESET;
 541                break;
 542
 543        case pci_channel_io_perm_failure:
 544                qib_devinfo(pdev, "State Permanent Failure, disabling\n");
 545                if (dd) {
 546                        /* no more register accesses! */
 547                        dd->flags &= ~QIB_PRESENT;
 548                        qib_disable_after_error(dd);
 549                }
 550                 /* else early, or other problem */
 551                ret =  PCI_ERS_RESULT_DISCONNECT;
 552                break;
 553
 554        default: /* shouldn't happen */
 555                qib_devinfo(pdev, "QIB PCI errors detected (state %d)\n",
 556                        state);
 557                break;
 558        }
 559        return ret;
 560}
 561
 562static pci_ers_result_t
 563qib_pci_mmio_enabled(struct pci_dev *pdev)
 564{
 565        u64 words = 0U;
 566        struct qib_devdata *dd = pci_get_drvdata(pdev);
 567        pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED;
 568
 569        if (dd && dd->pport) {
 570                words = dd->f_portcntr(dd->pport, QIBPORTCNTR_WORDRCV);
 571                if (words == ~0ULL)
 572                        ret = PCI_ERS_RESULT_NEED_RESET;
 573        }
 574        qib_devinfo(pdev,
 575                "QIB mmio_enabled function called, read wordscntr %Lx, returning %d\n",
 576                words, ret);
 577        return  ret;
 578}
 579
 580static pci_ers_result_t
 581qib_pci_slot_reset(struct pci_dev *pdev)
 582{
 583        qib_devinfo(pdev, "QIB slot_reset function called, ignored\n");
 584        return PCI_ERS_RESULT_CAN_RECOVER;
 585}
 586
 587static void
 588qib_pci_resume(struct pci_dev *pdev)
 589{
 590        struct qib_devdata *dd = pci_get_drvdata(pdev);
 591
 592        qib_devinfo(pdev, "QIB resume function called\n");
 593        /*
 594         * Running jobs will fail, since it's asynchronous
 595         * unlike sysfs-requested reset.   Better than
 596         * doing nothing.
 597         */
 598        qib_init(dd, 1); /* same as re-init after reset */
 599}
 600
 601const struct pci_error_handlers qib_pci_err_handler = {
 602        .error_detected = qib_pci_error_detected,
 603        .mmio_enabled = qib_pci_mmio_enabled,
 604        .slot_reset = qib_pci_slot_reset,
 605        .resume = qib_pci_resume,
 606};
 607