linux/arch/powerpc/kernel/eeh.c
<<
>>
Prefs
   1/*
   2 * Copyright IBM Corporation 2001, 2005, 2006
   3 * Copyright Dave Engebretsen & Todd Inglett 2001
   4 * Copyright Linas Vepstas 2005, 2006
   5 * Copyright 2001-2012 IBM Corporation.
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU General Public License
  18 * along with this program; if not, write to the Free Software
  19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  20 *
  21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/debugfs.h>
  26#include <linux/sched.h>
  27#include <linux/init.h>
  28#include <linux/list.h>
  29#include <linux/pci.h>
  30#include <linux/iommu.h>
  31#include <linux/proc_fs.h>
  32#include <linux/rbtree.h>
  33#include <linux/reboot.h>
  34#include <linux/seq_file.h>
  35#include <linux/spinlock.h>
  36#include <linux/export.h>
  37#include <linux/of.h>
  38
  39#include <linux/atomic.h>
  40#include <asm/eeh.h>
  41#include <asm/eeh_event.h>
  42#include <asm/io.h>
  43#include <asm/iommu.h>
  44#include <asm/machdep.h>
  45#include <asm/ppc-pci.h>
  46#include <asm/rtas.h>
  47
  48
  49/** Overview:
  50 *  EEH, or "Extended Error Handling" is a PCI bridge technology for
  51 *  dealing with PCI bus errors that can't be dealt with within the
  52 *  usual PCI framework, except by check-stopping the CPU.  Systems
  53 *  that are designed for high-availability/reliability cannot afford
  54 *  to crash due to a "mere" PCI error, thus the need for EEH.
  55 *  An EEH-capable bridge operates by converting a detected error
  56 *  into a "slot freeze", taking the PCI adapter off-line, making
  57 *  the slot behave, from the OS'es point of view, as if the slot
  58 *  were "empty": all reads return 0xff's and all writes are silently
  59 *  ignored.  EEH slot isolation events can be triggered by parity
  60 *  errors on the address or data busses (e.g. during posted writes),
  61 *  which in turn might be caused by low voltage on the bus, dust,
  62 *  vibration, humidity, radioactivity or plain-old failed hardware.
  63 *
  64 *  Note, however, that one of the leading causes of EEH slot
  65 *  freeze events are buggy device drivers, buggy device microcode,
  66 *  or buggy device hardware.  This is because any attempt by the
  67 *  device to bus-master data to a memory address that is not
  68 *  assigned to the device will trigger a slot freeze.   (The idea
  69 *  is to prevent devices-gone-wild from corrupting system memory).
  70 *  Buggy hardware/drivers will have a miserable time co-existing
  71 *  with EEH.
  72 *
  73 *  Ideally, a PCI device driver, when suspecting that an isolation
  74 *  event has occurred (e.g. by reading 0xff's), will then ask EEH
  75 *  whether this is the case, and then take appropriate steps to
  76 *  reset the PCI slot, the PCI device, and then resume operations.
  77 *  However, until that day,  the checking is done here, with the
  78 *  eeh_check_failure() routine embedded in the MMIO macros.  If
  79 *  the slot is found to be isolated, an "EEH Event" is synthesized
  80 *  and sent out for processing.
  81 */
  82
  83/* If a device driver keeps reading an MMIO register in an interrupt
  84 * handler after a slot isolation event, it might be broken.
  85 * This sets the threshold for how many read attempts we allow
  86 * before printing an error message.
  87 */
  88#define EEH_MAX_FAILS   2100000
  89
  90/* Time to wait for a PCI slot to report status, in milliseconds */
  91#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
  92
  93/*
  94 * EEH probe mode support, which is part of the flags,
  95 * is to support multiple platforms for EEH. Some platforms
  96 * like pSeries do PCI emunation based on device tree.
  97 * However, other platforms like powernv probe PCI devices
  98 * from hardware. The flag is used to distinguish that.
  99 * In addition, struct eeh_ops::probe would be invoked for
 100 * particular OF node or PCI device so that the corresponding
 101 * PE would be created there.
 102 */
 103int eeh_subsystem_flags;
 104EXPORT_SYMBOL(eeh_subsystem_flags);
 105
 106/*
 107 * EEH allowed maximal frozen times. If one particular PE's
 108 * frozen count in last hour exceeds this limit, the PE will
 109 * be forced to be offline permanently.
 110 */
 111int eeh_max_freezes = 5;
 112
 113/* Platform dependent EEH operations */
 114struct eeh_ops *eeh_ops = NULL;
 115
 116/* Lock to avoid races due to multiple reports of an error */
 117DEFINE_RAW_SPINLOCK(confirm_error_lock);
 118
 119/* Lock to protect passed flags */
 120static DEFINE_MUTEX(eeh_dev_mutex);
 121
 122/* Buffer for reporting pci register dumps. Its here in BSS, and
 123 * not dynamically alloced, so that it ends up in RMO where RTAS
 124 * can access it.
 125 */
 126#define EEH_PCI_REGS_LOG_LEN 8192
 127static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
 128
 129/*
 130 * The struct is used to maintain the EEH global statistic
 131 * information. Besides, the EEH global statistics will be
 132 * exported to user space through procfs
 133 */
 134struct eeh_stats {
 135        u64 no_device;          /* PCI device not found         */
 136        u64 no_dn;              /* OF node not found            */
 137        u64 no_cfg_addr;        /* Config address not found     */
 138        u64 ignored_check;      /* EEH check skipped            */
 139        u64 total_mmio_ffs;     /* Total EEH checks             */
 140        u64 false_positives;    /* Unnecessary EEH checks       */
 141        u64 slot_resets;        /* PE reset                     */
 142};
 143
 144static struct eeh_stats eeh_stats;
 145
 146static int __init eeh_setup(char *str)
 147{
 148        if (!strcmp(str, "off"))
 149                eeh_add_flag(EEH_FORCE_DISABLED);
 150        else if (!strcmp(str, "early_log"))
 151                eeh_add_flag(EEH_EARLY_DUMP_LOG);
 152
 153        return 1;
 154}
 155__setup("eeh=", eeh_setup);
 156
 157/*
 158 * This routine captures assorted PCI configuration space data
 159 * for the indicated PCI device, and puts them into a buffer
 160 * for RTAS error logging.
 161 */
 162static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 163{
 164        struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 165        u32 cfg;
 166        int cap, i;
 167        int n = 0, l = 0;
 168        char buffer[128];
 169
 170        n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
 171                       edev->phb->global_number, pdn->busno,
 172                       PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 173        pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
 174                edev->phb->global_number, pdn->busno,
 175                PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 176
 177        eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
 178        n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
 179        pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
 180
 181        eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg);
 182        n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
 183        pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
 184
 185        /* Gather bridge-specific registers */
 186        if (edev->mode & EEH_DEV_BRIDGE) {
 187                eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
 188                n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
 189                pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
 190
 191                eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
 192                n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
 193                pr_warn("EEH: Bridge control: %04x\n", cfg);
 194        }
 195
 196        /* Dump out the PCI-X command and status regs */
 197        cap = edev->pcix_cap;
 198        if (cap) {
 199                eeh_ops->read_config(pdn, cap, 4, &cfg);
 200                n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
 201                pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
 202
 203                eeh_ops->read_config(pdn, cap+4, 4, &cfg);
 204                n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
 205                pr_warn("EEH: PCI-X status: %08x\n", cfg);
 206        }
 207
 208        /* If PCI-E capable, dump PCI-E cap 10 */
 209        cap = edev->pcie_cap;
 210        if (cap) {
 211                n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
 212                pr_warn("EEH: PCI-E capabilities and status follow:\n");
 213
 214                for (i=0; i<=8; i++) {
 215                        eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
 216                        n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
 217
 218                        if ((i % 4) == 0) {
 219                                if (i != 0)
 220                                        pr_warn("%s\n", buffer);
 221
 222                                l = scnprintf(buffer, sizeof(buffer),
 223                                              "EEH: PCI-E %02x: %08x ",
 224                                              4*i, cfg);
 225                        } else {
 226                                l += scnprintf(buffer+l, sizeof(buffer)-l,
 227                                               "%08x ", cfg);
 228                        }
 229
 230                }
 231
 232                pr_warn("%s\n", buffer);
 233        }
 234
 235        /* If AER capable, dump it */
 236        cap = edev->aer_cap;
 237        if (cap) {
 238                n += scnprintf(buf+n, len-n, "pci-e AER:\n");
 239                pr_warn("EEH: PCI-E AER capability register set follows:\n");
 240
 241                for (i=0; i<=13; i++) {
 242                        eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
 243                        n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
 244
 245                        if ((i % 4) == 0) {
 246                                if (i != 0)
 247                                        pr_warn("%s\n", buffer);
 248
 249                                l = scnprintf(buffer, sizeof(buffer),
 250                                              "EEH: PCI-E AER %02x: %08x ",
 251                                              4*i, cfg);
 252                        } else {
 253                                l += scnprintf(buffer+l, sizeof(buffer)-l,
 254                                               "%08x ", cfg);
 255                        }
 256                }
 257
 258                pr_warn("%s\n", buffer);
 259        }
 260
 261        return n;
 262}
 263
 264static void *eeh_dump_pe_log(void *data, void *flag)
 265{
 266        struct eeh_pe *pe = data;
 267        struct eeh_dev *edev, *tmp;
 268        size_t *plen = flag;
 269
 270        /* If the PE's config space is blocked, 0xFF's will be
 271         * returned. It's pointless to collect the log in this
 272         * case.
 273         */
 274        if (pe->state & EEH_PE_CFG_BLOCKED)
 275                return NULL;
 276
 277        eeh_pe_for_each_dev(pe, edev, tmp)
 278                *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
 279                                          EEH_PCI_REGS_LOG_LEN - *plen);
 280
 281        return NULL;
 282}
 283
 284/**
 285 * eeh_slot_error_detail - Generate combined log including driver log and error log
 286 * @pe: EEH PE
 287 * @severity: temporary or permanent error log
 288 *
 289 * This routine should be called to generate the combined log, which
 290 * is comprised of driver log and error log. The driver log is figured
 291 * out from the config space of the corresponding PCI device, while
 292 * the error log is fetched through platform dependent function call.
 293 */
 294void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 295{
 296        size_t loglen = 0;
 297
 298        /*
 299         * When the PHB is fenced or dead, it's pointless to collect
 300         * the data from PCI config space because it should return
 301         * 0xFF's. For ER, we still retrieve the data from the PCI
 302         * config space.
 303         *
 304         * For pHyp, we have to enable IO for log retrieval. Otherwise,
 305         * 0xFF's is always returned from PCI config space.
 306         */
 307        if (!(pe->type & EEH_PE_PHB)) {
 308                if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG))
 309                        eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
 310
 311                /*
 312                 * The config space of some PCI devices can't be accessed
 313                 * when their PEs are in frozen state. Otherwise, fenced
 314                 * PHB might be seen. Those PEs are identified with flag
 315                 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED
 316                 * is set automatically when the PE is put to EEH_PE_ISOLATED.
 317                 *
 318                 * Restoring BARs possibly triggers PCI config access in
 319                 * (OPAL) firmware and then causes fenced PHB. If the
 320                 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's
 321                 * pointless to restore BARs and dump config space.
 322                 */
 323                eeh_ops->configure_bridge(pe);
 324                if (!(pe->state & EEH_PE_CFG_BLOCKED)) {
 325                        eeh_pe_restore_bars(pe);
 326
 327                        pci_regs_buf[0] = 0;
 328                        eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen);
 329                }
 330        }
 331
 332        eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
 333}
 334
 335/**
 336 * eeh_token_to_phys - Convert EEH address token to phys address
 337 * @token: I/O token, should be address in the form 0xA....
 338 *
 339 * This routine should be called to convert virtual I/O address
 340 * to physical one.
 341 */
 342static inline unsigned long eeh_token_to_phys(unsigned long token)
 343{
 344        pte_t *ptep;
 345        unsigned long pa;
 346        int hugepage_shift;
 347
 348        /*
 349         * We won't find hugepages here(this is iomem). Hence we are not
 350         * worried about _PAGE_SPLITTING/collapse. Also we will not hit
 351         * page table free, because of init_mm.
 352         */
 353        ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token,
 354                                           NULL, &hugepage_shift);
 355        if (!ptep)
 356                return token;
 357        WARN_ON(hugepage_shift);
 358        pa = pte_pfn(*ptep) << PAGE_SHIFT;
 359
 360        return pa | (token & (PAGE_SIZE-1));
 361}
 362
 363/*
 364 * On PowerNV platform, we might already have fenced PHB there.
 365 * For that case, it's meaningless to recover frozen PE. Intead,
 366 * We have to handle fenced PHB firstly.
 367 */
 368static int eeh_phb_check_failure(struct eeh_pe *pe)
 369{
 370        struct eeh_pe *phb_pe;
 371        unsigned long flags;
 372        int ret;
 373
 374        if (!eeh_has_flag(EEH_PROBE_MODE_DEV))
 375                return -EPERM;
 376
 377        /* Find the PHB PE */
 378        phb_pe = eeh_phb_pe_get(pe->phb);
 379        if (!phb_pe) {
 380                pr_warn("%s Can't find PE for PHB#%d\n",
 381                        __func__, pe->phb->global_number);
 382                return -EEXIST;
 383        }
 384
 385        /* If the PHB has been in problematic state */
 386        eeh_serialize_lock(&flags);
 387        if (phb_pe->state & EEH_PE_ISOLATED) {
 388                ret = 0;
 389                goto out;
 390        }
 391
 392        /* Check PHB state */
 393        ret = eeh_ops->get_state(phb_pe, NULL);
 394        if ((ret < 0) ||
 395            (ret == EEH_STATE_NOT_SUPPORT) ||
 396            (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
 397            (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
 398                ret = 0;
 399                goto out;
 400        }
 401
 402        /* Isolate the PHB and send event */
 403        eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
 404        eeh_serialize_unlock(flags);
 405
 406        pr_err("EEH: PHB#%x failure detected, location: %s\n",
 407                phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
 408        dump_stack();
 409        eeh_send_failure_event(phb_pe);
 410
 411        return 1;
 412out:
 413        eeh_serialize_unlock(flags);
 414        return ret;
 415}
 416
 417/**
 418 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
 419 * @edev: eeh device
 420 *
 421 * Check for an EEH failure for the given device node.  Call this
 422 * routine if the result of a read was all 0xff's and you want to
 423 * find out if this is due to an EEH slot freeze.  This routine
 424 * will query firmware for the EEH status.
 425 *
 426 * Returns 0 if there has not been an EEH error; otherwise returns
 427 * a non-zero value and queues up a slot isolation event notification.
 428 *
 429 * It is safe to call this routine in an interrupt context.
 430 */
 431int eeh_dev_check_failure(struct eeh_dev *edev)
 432{
 433        int ret;
 434        int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 435        unsigned long flags;
 436        struct pci_dn *pdn;
 437        struct pci_dev *dev;
 438        struct eeh_pe *pe, *parent_pe, *phb_pe;
 439        int rc = 0;
 440        const char *location = NULL;
 441
 442        eeh_stats.total_mmio_ffs++;
 443
 444        if (!eeh_enabled())
 445                return 0;
 446
 447        if (!edev) {
 448                eeh_stats.no_dn++;
 449                return 0;
 450        }
 451        dev = eeh_dev_to_pci_dev(edev);
 452        pe = eeh_dev_to_pe(edev);
 453
 454        /* Access to IO BARs might get this far and still not want checking. */
 455        if (!pe) {
 456                eeh_stats.ignored_check++;
 457                pr_debug("EEH: Ignored check for %s\n",
 458                        eeh_pci_name(dev));
 459                return 0;
 460        }
 461
 462        if (!pe->addr && !pe->config_addr) {
 463                eeh_stats.no_cfg_addr++;
 464                return 0;
 465        }
 466
 467        /*
 468         * On PowerNV platform, we might already have fenced PHB
 469         * there and we need take care of that firstly.
 470         */
 471        ret = eeh_phb_check_failure(pe);
 472        if (ret > 0)
 473                return ret;
 474
 475        /*
 476         * If the PE isn't owned by us, we shouldn't check the
 477         * state. Instead, let the owner handle it if the PE has
 478         * been frozen.
 479         */
 480        if (eeh_pe_passed(pe))
 481                return 0;
 482
 483        /* If we already have a pending isolation event for this
 484         * slot, we know it's bad already, we don't need to check.
 485         * Do this checking under a lock; as multiple PCI devices
 486         * in one slot might report errors simultaneously, and we
 487         * only want one error recovery routine running.
 488         */
 489        eeh_serialize_lock(&flags);
 490        rc = 1;
 491        if (pe->state & EEH_PE_ISOLATED) {
 492                pe->check_count++;
 493                if (pe->check_count % EEH_MAX_FAILS == 0) {
 494                        pdn = eeh_dev_to_pdn(edev);
 495                        if (pdn->node)
 496                                location = of_get_property(pdn->node, "ibm,loc-code", NULL);
 497                        printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
 498                                "location=%s driver=%s pci addr=%s\n",
 499                                pe->check_count,
 500                                location ? location : "unknown",
 501                                eeh_driver_name(dev), eeh_pci_name(dev));
 502                        printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
 503                                eeh_driver_name(dev));
 504                        dump_stack();
 505                }
 506                goto dn_unlock;
 507        }
 508
 509        /*
 510         * Now test for an EEH failure.  This is VERY expensive.
 511         * Note that the eeh_config_addr may be a parent device
 512         * in the case of a device behind a bridge, or it may be
 513         * function zero of a multi-function device.
 514         * In any case they must share a common PHB.
 515         */
 516        ret = eeh_ops->get_state(pe, NULL);
 517
 518        /* Note that config-io to empty slots may fail;
 519         * they are empty when they don't have children.
 520         * We will punt with the following conditions: Failure to get
 521         * PE's state, EEH not support and Permanently unavailable
 522         * state, PE is in good state.
 523         */
 524        if ((ret < 0) ||
 525            (ret == EEH_STATE_NOT_SUPPORT) ||
 526            ((ret & active_flags) == active_flags)) {
 527                eeh_stats.false_positives++;
 528                pe->false_positives++;
 529                rc = 0;
 530                goto dn_unlock;
 531        }
 532
 533        /*
 534         * It should be corner case that the parent PE has been
 535         * put into frozen state as well. We should take care
 536         * that at first.
 537         */
 538        parent_pe = pe->parent;
 539        while (parent_pe) {
 540                /* Hit the ceiling ? */
 541                if (parent_pe->type & EEH_PE_PHB)
 542                        break;
 543
 544                /* Frozen parent PE ? */
 545                ret = eeh_ops->get_state(parent_pe, NULL);
 546                if (ret > 0 &&
 547                    (ret & active_flags) != active_flags)
 548                        pe = parent_pe;
 549
 550                /* Next parent level */
 551                parent_pe = parent_pe->parent;
 552        }
 553
 554        eeh_stats.slot_resets++;
 555
 556        /* Avoid repeated reports of this failure, including problems
 557         * with other functions on this device, and functions under
 558         * bridges.
 559         */
 560        eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 561        eeh_serialize_unlock(flags);
 562
 563        /* Most EEH events are due to device driver bugs.  Having
 564         * a stack trace will help the device-driver authors figure
 565         * out what happened.  So print that out.
 566         */
 567        phb_pe = eeh_phb_pe_get(pe->phb);
 568        pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
 569               pe->phb->global_number, pe->addr);
 570        pr_err("EEH: PE location: %s, PHB location: %s\n",
 571               eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
 572        dump_stack();
 573
 574        eeh_send_failure_event(pe);
 575
 576        return 1;
 577
 578dn_unlock:
 579        eeh_serialize_unlock(flags);
 580        return rc;
 581}
 582
 583EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
 584
 585/**
 586 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
 587 * @token: I/O address
 588 *
 589 * Check for an EEH failure at the given I/O address. Call this
 590 * routine if the result of a read was all 0xff's and you want to
 591 * find out if this is due to an EEH slot freeze event. This routine
 592 * will query firmware for the EEH status.
 593 *
 594 * Note this routine is safe to call in an interrupt context.
 595 */
 596int eeh_check_failure(const volatile void __iomem *token)
 597{
 598        unsigned long addr;
 599        struct eeh_dev *edev;
 600
 601        /* Finding the phys addr + pci device; this is pretty quick. */
 602        addr = eeh_token_to_phys((unsigned long __force) token);
 603        edev = eeh_addr_cache_get_dev(addr);
 604        if (!edev) {
 605                eeh_stats.no_device++;
 606                return 0;
 607        }
 608
 609        return eeh_dev_check_failure(edev);
 610}
 611EXPORT_SYMBOL(eeh_check_failure);
 612
 613
 614/**
 615 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
 616 * @pe: EEH PE
 617 *
 618 * This routine should be called to reenable frozen MMIO or DMA
 619 * so that it would work correctly again. It's useful while doing
 620 * recovery or log collection on the indicated device.
 621 */
 622int eeh_pci_enable(struct eeh_pe *pe, int function)
 623{
 624        int active_flag, rc;
 625
 626        /*
 627         * pHyp doesn't allow to enable IO or DMA on unfrozen PE.
 628         * Also, it's pointless to enable them on unfrozen PE. So
 629         * we have to check before enabling IO or DMA.
 630         */
 631        switch (function) {
 632        case EEH_OPT_THAW_MMIO:
 633                active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED;
 634                break;
 635        case EEH_OPT_THAW_DMA:
 636                active_flag = EEH_STATE_DMA_ACTIVE;
 637                break;
 638        case EEH_OPT_DISABLE:
 639        case EEH_OPT_ENABLE:
 640        case EEH_OPT_FREEZE_PE:
 641                active_flag = 0;
 642                break;
 643        default:
 644                pr_warn("%s: Invalid function %d\n",
 645                        __func__, function);
 646                return -EINVAL;
 647        }
 648
 649        /*
 650         * Check if IO or DMA has been enabled before
 651         * enabling them.
 652         */
 653        if (active_flag) {
 654                rc = eeh_ops->get_state(pe, NULL);
 655                if (rc < 0)
 656                        return rc;
 657
 658                /* Needn't enable it at all */
 659                if (rc == EEH_STATE_NOT_SUPPORT)
 660                        return 0;
 661
 662                /* It's already enabled */
 663                if (rc & active_flag)
 664                        return 0;
 665        }
 666
 667
 668        /* Issue the request */
 669        rc = eeh_ops->set_option(pe, function);
 670        if (rc)
 671                pr_warn("%s: Unexpected state change %d on "
 672                        "PHB#%d-PE#%x, err=%d\n",
 673                        __func__, function, pe->phb->global_number,
 674                        pe->addr, rc);
 675
 676        /* Check if the request is finished successfully */
 677        if (active_flag) {
 678                rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
 679                if (rc < 0)
 680                        return rc;
 681
 682                if (rc & active_flag)
 683                        return 0;
 684
 685                return -EIO;
 686        }
 687
 688        return rc;
 689}
 690
 691static void *eeh_disable_and_save_dev_state(void *data, void *userdata)
 692{
 693        struct eeh_dev *edev = data;
 694        struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
 695        struct pci_dev *dev = userdata;
 696
 697        /*
 698         * The caller should have disabled and saved the
 699         * state for the specified device
 700         */
 701        if (!pdev || pdev == dev)
 702                return NULL;
 703
 704        /* Ensure we have D0 power state */
 705        pci_set_power_state(pdev, PCI_D0);
 706
 707        /* Save device state */
 708        pci_save_state(pdev);
 709
 710        /*
 711         * Disable device to avoid any DMA traffic and
 712         * interrupt from the device
 713         */
 714        pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
 715
 716        return NULL;
 717}
 718
 719static void *eeh_restore_dev_state(void *data, void *userdata)
 720{
 721        struct eeh_dev *edev = data;
 722        struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 723        struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
 724        struct pci_dev *dev = userdata;
 725
 726        if (!pdev)
 727                return NULL;
 728
 729        /* Apply customization from firmware */
 730        if (pdn && eeh_ops->restore_config)
 731                eeh_ops->restore_config(pdn);
 732
 733        /* The caller should restore state for the specified device */
 734        if (pdev != dev)
 735                pci_restore_state(pdev);
 736
 737        return NULL;
 738}
 739
 740/**
 741 * pcibios_set_pcie_slot_reset - Set PCI-E reset state
 742 * @dev: pci device struct
 743 * @state: reset state to enter
 744 *
 745 * Return value:
 746 *      0 if success
 747 */
 748int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
 749{
 750        struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
 751        struct eeh_pe *pe = eeh_dev_to_pe(edev);
 752
 753        if (!pe) {
 754                pr_err("%s: No PE found on PCI device %s\n",
 755                        __func__, pci_name(dev));
 756                return -EINVAL;
 757        }
 758
 759        switch (state) {
 760        case pcie_deassert_reset:
 761                eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
 762                eeh_unfreeze_pe(pe, false);
 763                if (!(pe->type & EEH_PE_VF))
 764                        eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
 765                eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
 766                eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 767                break;
 768        case pcie_hot_reset:
 769                eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 770                eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
 771                eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
 772                if (!(pe->type & EEH_PE_VF))
 773                        eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
 774                eeh_ops->reset(pe, EEH_RESET_HOT);
 775                break;
 776        case pcie_warm_reset:
 777                eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 778                eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
 779                eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
 780                if (!(pe->type & EEH_PE_VF))
 781                        eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
 782                eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
 783                break;
 784        default:
 785                eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED);
 786                return -EINVAL;
 787        };
 788
 789        return 0;
 790}
 791
 792/**
 793 * eeh_set_pe_freset - Check the required reset for the indicated device
 794 * @data: EEH device
 795 * @flag: return value
 796 *
 797 * Each device might have its preferred reset type: fundamental or
 798 * hot reset. The routine is used to collected the information for
 799 * the indicated device and its children so that the bunch of the
 800 * devices could be reset properly.
 801 */
 802static void *eeh_set_dev_freset(void *data, void *flag)
 803{
 804        struct pci_dev *dev;
 805        unsigned int *freset = (unsigned int *)flag;
 806        struct eeh_dev *edev = (struct eeh_dev *)data;
 807
 808        dev = eeh_dev_to_pci_dev(edev);
 809        if (dev)
 810                *freset |= dev->needs_freset;
 811
 812        return NULL;
 813}
 814
 815/**
 816 * eeh_pe_reset_full - Complete a full reset process on the indicated PE
 817 * @pe: EEH PE
 818 *
 819 * This function executes a full reset procedure on a PE, including setting
 820 * the appropriate flags, performing a fundamental or hot reset, and then
 821 * deactivating the reset status.  It is designed to be used within the EEH
 822 * subsystem, as opposed to eeh_pe_reset which is exported to drivers and
 823 * only performs a single operation at a time.
 824 *
 825 * This function will attempt to reset a PE three times before failing.
 826 */
 827int eeh_pe_reset_full(struct eeh_pe *pe)
 828{
 829        int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 830        int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
 831        int type = EEH_RESET_HOT;
 832        unsigned int freset = 0;
 833        int i, state, ret;
 834
 835        /*
 836         * Determine the type of reset to perform - hot or fundamental.
 837         * Hot reset is the default operation, unless any device under the
 838         * PE requires a fundamental reset.
 839         */
 840        eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
 841
 842        if (freset)
 843                type = EEH_RESET_FUNDAMENTAL;
 844
 845        /* Mark the PE as in reset state and block config space accesses */
 846        eeh_pe_state_mark(pe, reset_state);
 847
 848        /* Make three attempts at resetting the bus */
 849        for (i = 0; i < 3; i++) {
 850                ret = eeh_pe_reset(pe, type);
 851                if (ret)
 852                        break;
 853
 854                ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
 855                if (ret)
 856                        break;
 857
 858                /* Wait until the PE is in a functioning state */
 859                state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
 860                if ((state & active_flags) == active_flags)
 861                        break;
 862
 863                if (state < 0) {
 864                        pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
 865                                __func__, pe->phb->global_number, pe->addr);
 866                        ret = -ENOTRECOVERABLE;
 867                        break;
 868                }
 869
 870                /* Set error in case this is our last attempt */
 871                ret = -EIO;
 872                pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n",
 873                        __func__, state, pe->phb->global_number, pe->addr, (i + 1));
 874        }
 875
 876        eeh_pe_state_clear(pe, reset_state);
 877        return ret;
 878}
 879
 880/**
 881 * eeh_save_bars - Save device bars
 882 * @edev: PCI device associated EEH device
 883 *
 884 * Save the values of the device bars. Unlike the restore
 885 * routine, this routine is *not* recursive. This is because
 886 * PCI devices are added individually; but, for the restore,
 887 * an entire slot is reset at a time.
 888 */
 889void eeh_save_bars(struct eeh_dev *edev)
 890{
 891        struct pci_dn *pdn;
 892        int i;
 893
 894        pdn = eeh_dev_to_pdn(edev);
 895        if (!pdn)
 896                return;
 897
 898        for (i = 0; i < 16; i++)
 899                eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]);
 900
 901        /*
 902         * For PCI bridges including root port, we need enable bus
 903         * master explicitly. Otherwise, it can't fetch IODA table
 904         * entries correctly. So we cache the bit in advance so that
 905         * we can restore it after reset, either PHB range or PE range.
 906         */
 907        if (edev->mode & EEH_DEV_BRIDGE)
 908                edev->config_space[1] |= PCI_COMMAND_MASTER;
 909}
 910
 911/**
 912 * eeh_ops_register - Register platform dependent EEH operations
 913 * @ops: platform dependent EEH operations
 914 *
 915 * Register the platform dependent EEH operation callback
 916 * functions. The platform should call this function before
 917 * any other EEH operations.
 918 */
 919int __init eeh_ops_register(struct eeh_ops *ops)
 920{
 921        if (!ops->name) {
 922                pr_warn("%s: Invalid EEH ops name for %p\n",
 923                        __func__, ops);
 924                return -EINVAL;
 925        }
 926
 927        if (eeh_ops && eeh_ops != ops) {
 928                pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
 929                        __func__, eeh_ops->name, ops->name);
 930                return -EEXIST;
 931        }
 932
 933        eeh_ops = ops;
 934
 935        return 0;
 936}
 937
 938/**
 939 * eeh_ops_unregister - Unreigster platform dependent EEH operations
 940 * @name: name of EEH platform operations
 941 *
 942 * Unregister the platform dependent EEH operation callback
 943 * functions.
 944 */
 945int __exit eeh_ops_unregister(const char *name)
 946{
 947        if (!name || !strlen(name)) {
 948                pr_warn("%s: Invalid EEH ops name\n",
 949                        __func__);
 950                return -EINVAL;
 951        }
 952
 953        if (eeh_ops && !strcmp(eeh_ops->name, name)) {
 954                eeh_ops = NULL;
 955                return 0;
 956        }
 957
 958        return -EEXIST;
 959}
 960
 961static int eeh_reboot_notifier(struct notifier_block *nb,
 962                               unsigned long action, void *unused)
 963{
 964        eeh_clear_flag(EEH_ENABLED);
 965        return NOTIFY_DONE;
 966}
 967
 968static struct notifier_block eeh_reboot_nb = {
 969        .notifier_call = eeh_reboot_notifier,
 970};
 971
 972/**
 973 * eeh_init - EEH initialization
 974 *
 975 * Initialize EEH by trying to enable it for all of the adapters in the system.
 976 * As a side effect we can determine here if eeh is supported at all.
 977 * Note that we leave EEH on so failed config cycles won't cause a machine
 978 * check.  If a user turns off EEH for a particular adapter they are really
 979 * telling Linux to ignore errors.  Some hardware (e.g. POWER5) won't
 980 * grant access to a slot if EEH isn't enabled, and so we always enable
 981 * EEH for all slots/all devices.
 982 *
 983 * The eeh-force-off option disables EEH checking globally, for all slots.
 984 * Even if force-off is set, the EEH hardware is still enabled, so that
 985 * newer systems can boot.
 986 */
 987int eeh_init(void)
 988{
 989        struct pci_controller *hose, *tmp;
 990        struct pci_dn *pdn;
 991        static int cnt = 0;
 992        int ret = 0;
 993
 994        /*
 995         * We have to delay the initialization on PowerNV after
 996         * the PCI hierarchy tree has been built because the PEs
 997         * are figured out based on PCI devices instead of device
 998         * tree nodes
 999         */
1000        if (machine_is(powernv) && cnt++ <= 0)
1001                return ret;
1002
1003        /* Register reboot notifier */
1004        ret = register_reboot_notifier(&eeh_reboot_nb);
1005        if (ret) {
1006                pr_warn("%s: Failed to register notifier (%d)\n",
1007                        __func__, ret);
1008                return ret;
1009        }
1010
1011        /* call platform initialization function */
1012        if (!eeh_ops) {
1013                pr_warn("%s: Platform EEH operation not found\n",
1014                        __func__);
1015                return -EEXIST;
1016        } else if ((ret = eeh_ops->init()))
1017                return ret;
1018
1019        /* Initialize EEH event */
1020        ret = eeh_event_init();
1021        if (ret)
1022                return ret;
1023
1024        /* Enable EEH for all adapters */
1025        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1026                pdn = hose->pci_data;
1027                traverse_pci_dn(pdn, eeh_ops->probe, NULL);
1028        }
1029
1030        /*
1031         * Call platform post-initialization. Actually, It's good chance
1032         * to inform platform that EEH is ready to supply service if the
1033         * I/O cache stuff has been built up.
1034         */
1035        if (eeh_ops->post_init) {
1036                ret = eeh_ops->post_init();
1037                if (ret)
1038                        return ret;
1039        }
1040
1041        if (eeh_enabled())
1042                pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
1043        else
1044                pr_warn("EEH: No capable adapters found\n");
1045
1046        return ret;
1047}
1048
1049core_initcall_sync(eeh_init);
1050
1051/**
1052 * eeh_add_device_early - Enable EEH for the indicated device node
1053 * @pdn: PCI device node for which to set up EEH
1054 *
1055 * This routine must be used to perform EEH initialization for PCI
1056 * devices that were added after system boot (e.g. hotplug, dlpar).
1057 * This routine must be called before any i/o is performed to the
1058 * adapter (inluding any config-space i/o).
1059 * Whether this actually enables EEH or not for this device depends
1060 * on the CEC architecture, type of the device, on earlier boot
1061 * command-line arguments & etc.
1062 */
1063void eeh_add_device_early(struct pci_dn *pdn)
1064{
1065        struct pci_controller *phb;
1066        struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
1067
1068        if (!edev)
1069                return;
1070
1071        if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
1072                return;
1073
1074        /* USB Bus children of PCI devices will not have BUID's */
1075        phb = edev->phb;
1076        if (NULL == phb ||
1077            (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
1078                return;
1079
1080        eeh_ops->probe(pdn, NULL);
1081}
1082
1083/**
1084 * eeh_add_device_tree_early - Enable EEH for the indicated device
1085 * @pdn: PCI device node
1086 *
1087 * This routine must be used to perform EEH initialization for the
1088 * indicated PCI device that was added after system boot (e.g.
1089 * hotplug, dlpar).
1090 */
1091void eeh_add_device_tree_early(struct pci_dn *pdn)
1092{
1093        struct pci_dn *n;
1094
1095        if (!pdn)
1096                return;
1097
1098        list_for_each_entry(n, &pdn->child_list, list)
1099                eeh_add_device_tree_early(n);
1100        eeh_add_device_early(pdn);
1101}
1102EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
1103
1104/**
1105 * eeh_add_device_late - Perform EEH initialization for the indicated pci device
1106 * @dev: pci device for which to set up EEH
1107 *
1108 * This routine must be used to complete EEH initialization for PCI
1109 * devices that were added after system boot (e.g. hotplug, dlpar).
1110 */
1111void eeh_add_device_late(struct pci_dev *dev)
1112{
1113        struct pci_dn *pdn;
1114        struct eeh_dev *edev;
1115
1116        if (!dev || !eeh_enabled())
1117                return;
1118
1119        pr_debug("EEH: Adding device %s\n", pci_name(dev));
1120
1121        pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
1122        edev = pdn_to_eeh_dev(pdn);
1123        if (edev->pdev == dev) {
1124                pr_debug("EEH: Already referenced !\n");
1125                return;
1126        }
1127
1128        /*
1129         * The EEH cache might not be removed correctly because of
1130         * unbalanced kref to the device during unplug time, which
1131         * relies on pcibios_release_device(). So we have to remove
1132         * that here explicitly.
1133         */
1134        if (edev->pdev) {
1135                eeh_rmv_from_parent_pe(edev);
1136                eeh_addr_cache_rmv_dev(edev->pdev);
1137                eeh_sysfs_remove_device(edev->pdev);
1138                edev->mode &= ~EEH_DEV_SYSFS;
1139
1140                /*
1141                 * We definitely should have the PCI device removed
1142                 * though it wasn't correctly. So we needn't call
1143                 * into error handler afterwards.
1144                 */
1145                edev->mode |= EEH_DEV_NO_HANDLER;
1146
1147                edev->pdev = NULL;
1148                dev->dev.archdata.edev = NULL;
1149        }
1150
1151        if (eeh_has_flag(EEH_PROBE_MODE_DEV))
1152                eeh_ops->probe(pdn, NULL);
1153
1154        edev->pdev = dev;
1155        dev->dev.archdata.edev = edev;
1156
1157        eeh_addr_cache_insert_dev(dev);
1158}
1159
1160/**
1161 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
1162 * @bus: PCI bus
1163 *
1164 * This routine must be used to perform EEH initialization for PCI
1165 * devices which are attached to the indicated PCI bus. The PCI bus
1166 * is added after system boot through hotplug or dlpar.
1167 */
1168void eeh_add_device_tree_late(struct pci_bus *bus)
1169{
1170        struct pci_dev *dev;
1171
1172        list_for_each_entry(dev, &bus->devices, bus_list) {
1173                eeh_add_device_late(dev);
1174                if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1175                        struct pci_bus *subbus = dev->subordinate;
1176                        if (subbus)
1177                                eeh_add_device_tree_late(subbus);
1178                }
1179        }
1180}
1181EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
1182
1183/**
1184 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
1185 * @bus: PCI bus
1186 *
1187 * This routine must be used to add EEH sysfs files for PCI
1188 * devices which are attached to the indicated PCI bus. The PCI bus
1189 * is added after system boot through hotplug or dlpar.
1190 */
1191void eeh_add_sysfs_files(struct pci_bus *bus)
1192{
1193        struct pci_dev *dev;
1194
1195        list_for_each_entry(dev, &bus->devices, bus_list) {
1196                eeh_sysfs_add_device(dev);
1197                if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1198                        struct pci_bus *subbus = dev->subordinate;
1199                        if (subbus)
1200                                eeh_add_sysfs_files(subbus);
1201                }
1202        }
1203}
1204EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
1205
1206/**
1207 * eeh_remove_device - Undo EEH setup for the indicated pci device
1208 * @dev: pci device to be removed
1209 *
1210 * This routine should be called when a device is removed from
1211 * a running system (e.g. by hotplug or dlpar).  It unregisters
1212 * the PCI device from the EEH subsystem.  I/O errors affecting
1213 * this device will no longer be detected after this call; thus,
1214 * i/o errors affecting this slot may leave this device unusable.
1215 */
1216void eeh_remove_device(struct pci_dev *dev)
1217{
1218        struct eeh_dev *edev;
1219
1220        if (!dev || !eeh_enabled())
1221                return;
1222        edev = pci_dev_to_eeh_dev(dev);
1223
1224        /* Unregister the device with the EEH/PCI address search system */
1225        pr_debug("EEH: Removing device %s\n", pci_name(dev));
1226
1227        if (!edev || !edev->pdev || !edev->pe) {
1228                pr_debug("EEH: Not referenced !\n");
1229                return;
1230        }
1231
1232        /*
1233         * During the hotplug for EEH error recovery, we need the EEH
1234         * device attached to the parent PE in order for BAR restore
1235         * a bit later. So we keep it for BAR restore and remove it
1236         * from the parent PE during the BAR resotre.
1237         */
1238        edev->pdev = NULL;
1239
1240        /*
1241         * The flag "in_error" is used to trace EEH devices for VFs
1242         * in error state or not. It's set in eeh_report_error(). If
1243         * it's not set, eeh_report_{reset,resume}() won't be called
1244         * for the VF EEH device.
1245         */
1246        edev->in_error = false;
1247        dev->dev.archdata.edev = NULL;
1248        if (!(edev->pe->state & EEH_PE_KEEP))
1249                eeh_rmv_from_parent_pe(edev);
1250        else
1251                edev->mode |= EEH_DEV_DISCONNECTED;
1252
1253        /*
1254         * We're removing from the PCI subsystem, that means
1255         * the PCI device driver can't support EEH or not
1256         * well. So we rely on hotplug completely to do recovery
1257         * for the specific PCI device.
1258         */
1259        edev->mode |= EEH_DEV_NO_HANDLER;
1260
1261        eeh_addr_cache_rmv_dev(dev);
1262        eeh_sysfs_remove_device(dev);
1263        edev->mode &= ~EEH_DEV_SYSFS;
1264}
1265
1266int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
1267{
1268        int ret;
1269
1270        ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
1271        if (ret) {
1272                pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n",
1273                        __func__, ret, pe->phb->global_number, pe->addr);
1274                return ret;
1275        }
1276
1277        ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
1278        if (ret) {
1279                pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n",
1280                        __func__, ret, pe->phb->global_number, pe->addr);
1281                return ret;
1282        }
1283
1284        /* Clear software isolated state */
1285        if (sw_state && (pe->state & EEH_PE_ISOLATED))
1286                eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
1287
1288        return ret;
1289}
1290
1291
1292static struct pci_device_id eeh_reset_ids[] = {
1293        { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE     */
1294        { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */
1295        { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */
1296        { 0 }
1297};
1298
1299static int eeh_pe_change_owner(struct eeh_pe *pe)
1300{
1301        struct eeh_dev *edev, *tmp;
1302        struct pci_dev *pdev;
1303        struct pci_device_id *id;
1304        int flags, ret;
1305
1306        /* Check PE state */
1307        flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
1308        ret = eeh_ops->get_state(pe, NULL);
1309        if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
1310                return 0;
1311
1312        /* Unfrozen PE, nothing to do */
1313        if ((ret & flags) == flags)
1314                return 0;
1315
1316        /* Frozen PE, check if it needs PE level reset */
1317        eeh_pe_for_each_dev(pe, edev, tmp) {
1318                pdev = eeh_dev_to_pci_dev(edev);
1319                if (!pdev)
1320                        continue;
1321
1322                for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) {
1323                        if (id->vendor != PCI_ANY_ID &&
1324                            id->vendor != pdev->vendor)
1325                                continue;
1326                        if (id->device != PCI_ANY_ID &&
1327                            id->device != pdev->device)
1328                                continue;
1329                        if (id->subvendor != PCI_ANY_ID &&
1330                            id->subvendor != pdev->subsystem_vendor)
1331                                continue;
1332                        if (id->subdevice != PCI_ANY_ID &&
1333                            id->subdevice != pdev->subsystem_device)
1334                                continue;
1335
1336                        goto reset;
1337                }
1338        }
1339
1340        return eeh_unfreeze_pe(pe, true);
1341
1342reset:
1343        return eeh_pe_reset_and_recover(pe);
1344}
1345
1346/**
1347 * eeh_dev_open - Increase count of pass through devices for PE
1348 * @pdev: PCI device
1349 *
1350 * Increase count of passed through devices for the indicated
1351 * PE. In the result, the EEH errors detected on the PE won't be
1352 * reported. The PE owner will be responsible for detection
1353 * and recovery.
1354 */
1355int eeh_dev_open(struct pci_dev *pdev)
1356{
1357        struct eeh_dev *edev;
1358        int ret = -ENODEV;
1359
1360        mutex_lock(&eeh_dev_mutex);
1361
1362        /* No PCI device ? */
1363        if (!pdev)
1364                goto out;
1365
1366        /* No EEH device or PE ? */
1367        edev = pci_dev_to_eeh_dev(pdev);
1368        if (!edev || !edev->pe)
1369                goto out;
1370
1371        /*
1372         * The PE might have been put into frozen state, but we
1373         * didn't detect that yet. The passed through PCI devices
1374         * in frozen PE won't work properly. Clear the frozen state
1375         * in advance.
1376         */
1377        ret = eeh_pe_change_owner(edev->pe);
1378        if (ret)
1379                goto out;
1380
1381        /* Increase PE's pass through count */
1382        atomic_inc(&edev->pe->pass_dev_cnt);
1383        mutex_unlock(&eeh_dev_mutex);
1384
1385        return 0;
1386out:
1387        mutex_unlock(&eeh_dev_mutex);
1388        return ret;
1389}
1390EXPORT_SYMBOL_GPL(eeh_dev_open);
1391
1392/**
1393 * eeh_dev_release - Decrease count of pass through devices for PE
1394 * @pdev: PCI device
1395 *
1396 * Decrease count of pass through devices for the indicated PE. If
1397 * there is no passed through device in PE, the EEH errors detected
1398 * on the PE will be reported and handled as usual.
1399 */
1400void eeh_dev_release(struct pci_dev *pdev)
1401{
1402        struct eeh_dev *edev;
1403
1404        mutex_lock(&eeh_dev_mutex);
1405
1406        /* No PCI device ? */
1407        if (!pdev)
1408                goto out;
1409
1410        /* No EEH device ? */
1411        edev = pci_dev_to_eeh_dev(pdev);
1412        if (!edev || !edev->pe || !eeh_pe_passed(edev->pe))
1413                goto out;
1414
1415        /* Decrease PE's pass through count */
1416        atomic_dec(&edev->pe->pass_dev_cnt);
1417        WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0);
1418        eeh_pe_change_owner(edev->pe);
1419out:
1420        mutex_unlock(&eeh_dev_mutex);
1421}
1422EXPORT_SYMBOL(eeh_dev_release);
1423
1424#ifdef CONFIG_IOMMU_API
1425
1426static int dev_has_iommu_table(struct device *dev, void *data)
1427{
1428        struct pci_dev *pdev = to_pci_dev(dev);
1429        struct pci_dev **ppdev = data;
1430
1431        if (!dev)
1432                return 0;
1433
1434        if (dev->iommu_group) {
1435                *ppdev = pdev;
1436                return 1;
1437        }
1438
1439        return 0;
1440}
1441
1442/**
1443 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE
1444 * @group: IOMMU group
1445 *
1446 * The routine is called to convert IOMMU group to EEH PE.
1447 */
1448struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group)
1449{
1450        struct pci_dev *pdev = NULL;
1451        struct eeh_dev *edev;
1452        int ret;
1453
1454        /* No IOMMU group ? */
1455        if (!group)
1456                return NULL;
1457
1458        ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table);
1459        if (!ret || !pdev)
1460                return NULL;
1461
1462        /* No EEH device or PE ? */
1463        edev = pci_dev_to_eeh_dev(pdev);
1464        if (!edev || !edev->pe)
1465                return NULL;
1466
1467        return edev->pe;
1468}
1469EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe);
1470
1471#endif /* CONFIG_IOMMU_API */
1472
1473/**
1474 * eeh_pe_set_option - Set options for the indicated PE
1475 * @pe: EEH PE
1476 * @option: requested option
1477 *
1478 * The routine is called to enable or disable EEH functionality
1479 * on the indicated PE, to enable IO or DMA for the frozen PE.
1480 */
1481int eeh_pe_set_option(struct eeh_pe *pe, int option)
1482{
1483        int ret = 0;
1484
1485        /* Invalid PE ? */
1486        if (!pe)
1487                return -ENODEV;
1488
1489        /*
1490         * EEH functionality could possibly be disabled, just
1491         * return error for the case. And the EEH functinality
1492         * isn't expected to be disabled on one specific PE.
1493         */
1494        switch (option) {
1495        case EEH_OPT_ENABLE:
1496                if (eeh_enabled()) {
1497                        ret = eeh_pe_change_owner(pe);
1498                        break;
1499                }
1500                ret = -EIO;
1501                break;
1502        case EEH_OPT_DISABLE:
1503                break;
1504        case EEH_OPT_THAW_MMIO:
1505        case EEH_OPT_THAW_DMA:
1506                if (!eeh_ops || !eeh_ops->set_option) {
1507                        ret = -ENOENT;
1508                        break;
1509                }
1510
1511                ret = eeh_pci_enable(pe, option);
1512                break;
1513        default:
1514                pr_debug("%s: Option %d out of range (%d, %d)\n",
1515                        __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA);
1516                ret = -EINVAL;
1517        }
1518
1519        return ret;
1520}
1521EXPORT_SYMBOL_GPL(eeh_pe_set_option);
1522
1523/**
1524 * eeh_pe_get_state - Retrieve PE's state
1525 * @pe: EEH PE
1526 *
1527 * Retrieve the PE's state, which includes 3 aspects: enabled
1528 * DMA, enabled IO and asserted reset.
1529 */
1530int eeh_pe_get_state(struct eeh_pe *pe)
1531{
1532        int result, ret = 0;
1533        bool rst_active, dma_en, mmio_en;
1534
1535        /* Existing PE ? */
1536        if (!pe)
1537                return -ENODEV;
1538
1539        if (!eeh_ops || !eeh_ops->get_state)
1540                return -ENOENT;
1541
1542        /*
1543         * If the parent PE is owned by the host kernel and is undergoing
1544         * error recovery, we should return the PE state as temporarily
1545         * unavailable so that the error recovery on the guest is suspended
1546         * until the recovery completes on the host.
1547         */
1548        if (pe->parent &&
1549            !(pe->state & EEH_PE_REMOVED) &&
1550            (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
1551                return EEH_PE_STATE_UNAVAIL;
1552
1553        result = eeh_ops->get_state(pe, NULL);
1554        rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
1555        dma_en = !!(result & EEH_STATE_DMA_ENABLED);
1556        mmio_en = !!(result & EEH_STATE_MMIO_ENABLED);
1557
1558        if (rst_active)
1559                ret = EEH_PE_STATE_RESET;
1560        else if (dma_en && mmio_en)
1561                ret = EEH_PE_STATE_NORMAL;
1562        else if (!dma_en && !mmio_en)
1563                ret = EEH_PE_STATE_STOPPED_IO_DMA;
1564        else if (!dma_en && mmio_en)
1565                ret = EEH_PE_STATE_STOPPED_DMA;
1566        else
1567                ret = EEH_PE_STATE_UNAVAIL;
1568
1569        return ret;
1570}
1571EXPORT_SYMBOL_GPL(eeh_pe_get_state);
1572
1573static int eeh_pe_reenable_devices(struct eeh_pe *pe)
1574{
1575        struct eeh_dev *edev, *tmp;
1576        struct pci_dev *pdev;
1577        int ret = 0;
1578
1579        /* Restore config space */
1580        eeh_pe_restore_bars(pe);
1581
1582        /*
1583         * Reenable PCI devices as the devices passed
1584         * through are always enabled before the reset.
1585         */
1586        eeh_pe_for_each_dev(pe, edev, tmp) {
1587                pdev = eeh_dev_to_pci_dev(edev);
1588                if (!pdev)
1589                        continue;
1590
1591                ret = pci_reenable_device(pdev);
1592                if (ret) {
1593                        pr_warn("%s: Failure %d reenabling %s\n",
1594                                __func__, ret, pci_name(pdev));
1595                        return ret;
1596                }
1597        }
1598
1599        /* The PE is still in frozen state */
1600        return eeh_unfreeze_pe(pe, true);
1601}
1602
1603
1604/**
1605 * eeh_pe_reset - Issue PE reset according to specified type
1606 * @pe: EEH PE
1607 * @option: reset type
1608 *
1609 * The routine is called to reset the specified PE with the
1610 * indicated type, either fundamental reset or hot reset.
1611 * PE reset is the most important part for error recovery.
1612 */
1613int eeh_pe_reset(struct eeh_pe *pe, int option)
1614{
1615        int ret = 0;
1616
1617        /* Invalid PE ? */
1618        if (!pe)
1619                return -ENODEV;
1620
1621        if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset)
1622                return -ENOENT;
1623
1624        switch (option) {
1625        case EEH_RESET_DEACTIVATE:
1626                ret = eeh_ops->reset(pe, option);
1627                eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
1628                if (ret)
1629                        break;
1630
1631                ret = eeh_pe_reenable_devices(pe);
1632                break;
1633        case EEH_RESET_HOT:
1634        case EEH_RESET_FUNDAMENTAL:
1635                /*
1636                 * Proactively freeze the PE to drop all MMIO access
1637                 * during reset, which should be banned as it's always
1638                 * cause recursive EEH error.
1639                 */
1640                eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
1641
1642                eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
1643                ret = eeh_ops->reset(pe, option);
1644                break;
1645        default:
1646                pr_debug("%s: Unsupported option %d\n",
1647                        __func__, option);
1648                ret = -EINVAL;
1649        }
1650
1651        return ret;
1652}
1653EXPORT_SYMBOL_GPL(eeh_pe_reset);
1654
1655/**
1656 * eeh_pe_configure - Configure PCI bridges after PE reset
1657 * @pe: EEH PE
1658 *
1659 * The routine is called to restore the PCI config space for
1660 * those PCI devices, especially PCI bridges affected by PE
1661 * reset issued previously.
1662 */
1663int eeh_pe_configure(struct eeh_pe *pe)
1664{
1665        int ret = 0;
1666
1667        /* Invalid PE ? */
1668        if (!pe)
1669                return -ENODEV;
1670
1671        return ret;
1672}
1673EXPORT_SYMBOL_GPL(eeh_pe_configure);
1674
1675static int proc_eeh_show(struct seq_file *m, void *v)
1676{
1677        if (!eeh_enabled()) {
1678                seq_printf(m, "EEH Subsystem is globally disabled\n");
1679                seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
1680        } else {
1681                seq_printf(m, "EEH Subsystem is enabled\n");
1682                seq_printf(m,
1683                                "no device=%llu\n"
1684                                "no device node=%llu\n"
1685                                "no config address=%llu\n"
1686                                "check not wanted=%llu\n"
1687                                "eeh_total_mmio_ffs=%llu\n"
1688                                "eeh_false_positives=%llu\n"
1689                                "eeh_slot_resets=%llu\n",
1690                                eeh_stats.no_device,
1691                                eeh_stats.no_dn,
1692                                eeh_stats.no_cfg_addr,
1693                                eeh_stats.ignored_check,
1694                                eeh_stats.total_mmio_ffs,
1695                                eeh_stats.false_positives,
1696                                eeh_stats.slot_resets);
1697        }
1698
1699        return 0;
1700}
1701
1702static int proc_eeh_open(struct inode *inode, struct file *file)
1703{
1704        return single_open(file, proc_eeh_show, NULL);
1705}
1706
1707static const struct file_operations proc_eeh_operations = {
1708        .open      = proc_eeh_open,
1709        .read      = seq_read,
1710        .llseek    = seq_lseek,
1711        .release   = single_release,
1712};
1713
1714#ifdef CONFIG_DEBUG_FS
1715static int eeh_enable_dbgfs_set(void *data, u64 val)
1716{
1717        if (val)
1718                eeh_clear_flag(EEH_FORCE_DISABLED);
1719        else
1720                eeh_add_flag(EEH_FORCE_DISABLED);
1721
1722        /* Notify the backend */
1723        if (eeh_ops->post_init)
1724                eeh_ops->post_init();
1725
1726        return 0;
1727}
1728
1729static int eeh_enable_dbgfs_get(void *data, u64 *val)
1730{
1731        if (eeh_enabled())
1732                *val = 0x1ul;
1733        else
1734                *val = 0x0ul;
1735        return 0;
1736}
1737
1738static int eeh_freeze_dbgfs_set(void *data, u64 val)
1739{
1740        eeh_max_freezes = val;
1741        return 0;
1742}
1743
1744static int eeh_freeze_dbgfs_get(void *data, u64 *val)
1745{
1746        *val = eeh_max_freezes;
1747        return 0;
1748}
1749
1750DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
1751                        eeh_enable_dbgfs_set, "0x%llx\n");
1752DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get,
1753                        eeh_freeze_dbgfs_set, "0x%llx\n");
1754#endif
1755
1756static int __init eeh_init_proc(void)
1757{
1758        if (machine_is(pseries) || machine_is(powernv)) {
1759                proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
1760#ifdef CONFIG_DEBUG_FS
1761                debugfs_create_file("eeh_enable", 0600,
1762                                    powerpc_debugfs_root, NULL,
1763                                    &eeh_enable_dbgfs_ops);
1764                debugfs_create_file("eeh_max_freezes", 0600,
1765                                    powerpc_debugfs_root, NULL,
1766                                    &eeh_freeze_dbgfs_ops);
1767#endif
1768        }
1769
1770        return 0;
1771}
1772__initcall(eeh_init_proc);
1773