linux/drivers/pci/pcie/err.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * This file implements the error recovery as a core part of PCIe error
   4 * reporting. When a PCIe error is delivered, an error message will be
   5 * collected and printed to console, then, an error recovery procedure
   6 * will be executed by following the PCI error recovery rules.
   7 *
   8 * Copyright (C) 2006 Intel Corp.
   9 *      Tom Long Nguyen (tom.l.nguyen@intel.com)
  10 *      Zhang Yanmin (yanmin.zhang@intel.com)
  11 */
  12
  13#define dev_fmt(fmt) "AER: " fmt
  14
  15#include <linux/pci.h>
  16#include <linux/module.h>
  17#include <linux/kernel.h>
  18#include <linux/errno.h>
  19#include <linux/aer.h>
  20#include "portdrv.h"
  21#include "../pci.h"
  22
  23static pci_ers_result_t merge_result(enum pci_ers_result orig,
  24                                  enum pci_ers_result new)
  25{
  26        if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
  27                return PCI_ERS_RESULT_NO_AER_DRIVER;
  28
  29        if (new == PCI_ERS_RESULT_NONE)
  30                return orig;
  31
  32        switch (orig) {
  33        case PCI_ERS_RESULT_CAN_RECOVER:
  34        case PCI_ERS_RESULT_RECOVERED:
  35                orig = new;
  36                break;
  37        case PCI_ERS_RESULT_DISCONNECT:
  38                if (new == PCI_ERS_RESULT_NEED_RESET)
  39                        orig = PCI_ERS_RESULT_NEED_RESET;
  40                break;
  41        default:
  42                break;
  43        }
  44
  45        return orig;
  46}
  47
  48static int report_error_detected(struct pci_dev *dev,
  49                                 pci_channel_state_t state,
  50                                 enum pci_ers_result *result)
  51{
  52        pci_ers_result_t vote;
  53        const struct pci_error_handlers *err_handler;
  54
  55        device_lock(&dev->dev);
  56        if (!pci_dev_set_io_state(dev, state) ||
  57                !dev->driver ||
  58                !dev->driver->err_handler ||
  59                !dev->driver->err_handler->error_detected) {
  60                /*
  61                 * If any device in the subtree does not have an error_detected
  62                 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
  63                 * error callbacks of "any" device in the subtree, and will
  64                 * exit in the disconnected error state.
  65                 */
  66                if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
  67                        vote = PCI_ERS_RESULT_NO_AER_DRIVER;
  68                        pci_info(dev, "can't recover (no error_detected callback)\n");
  69                } else {
  70                        vote = PCI_ERS_RESULT_NONE;
  71                }
  72        } else {
  73                err_handler = dev->driver->err_handler;
  74                vote = err_handler->error_detected(dev, state);
  75        }
  76        pci_uevent_ers(dev, vote);
  77        *result = merge_result(*result, vote);
  78        device_unlock(&dev->dev);
  79        return 0;
  80}
  81
  82static int report_frozen_detected(struct pci_dev *dev, void *data)
  83{
  84        return report_error_detected(dev, pci_channel_io_frozen, data);
  85}
  86
  87static int report_normal_detected(struct pci_dev *dev, void *data)
  88{
  89        return report_error_detected(dev, pci_channel_io_normal, data);
  90}
  91
  92static int report_mmio_enabled(struct pci_dev *dev, void *data)
  93{
  94        pci_ers_result_t vote, *result = data;
  95        const struct pci_error_handlers *err_handler;
  96
  97        device_lock(&dev->dev);
  98        if (!dev->driver ||
  99                !dev->driver->err_handler ||
 100                !dev->driver->err_handler->mmio_enabled)
 101                goto out;
 102
 103        err_handler = dev->driver->err_handler;
 104        vote = err_handler->mmio_enabled(dev);
 105        *result = merge_result(*result, vote);
 106out:
 107        device_unlock(&dev->dev);
 108        return 0;
 109}
 110
 111static int report_slot_reset(struct pci_dev *dev, void *data)
 112{
 113        pci_ers_result_t vote, *result = data;
 114        const struct pci_error_handlers *err_handler;
 115
 116        device_lock(&dev->dev);
 117        if (!dev->driver ||
 118                !dev->driver->err_handler ||
 119                !dev->driver->err_handler->slot_reset)
 120                goto out;
 121
 122        err_handler = dev->driver->err_handler;
 123        vote = err_handler->slot_reset(dev);
 124        *result = merge_result(*result, vote);
 125out:
 126        device_unlock(&dev->dev);
 127        return 0;
 128}
 129
 130static int report_resume(struct pci_dev *dev, void *data)
 131{
 132        const struct pci_error_handlers *err_handler;
 133
 134        device_lock(&dev->dev);
 135        if (!pci_dev_set_io_state(dev, pci_channel_io_normal) ||
 136                !dev->driver ||
 137                !dev->driver->err_handler ||
 138                !dev->driver->err_handler->resume)
 139                goto out;
 140
 141        err_handler = dev->driver->err_handler;
 142        err_handler->resume(dev);
 143out:
 144        pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
 145        device_unlock(&dev->dev);
 146        return 0;
 147}
 148
 149/**
 150 * pci_walk_bridge - walk bridges potentially AER affected
 151 * @bridge:     bridge which may be a Port, an RCEC, or an RCiEP
 152 * @cb:         callback to be called for each device found
 153 * @userdata:   arbitrary pointer to be passed to callback
 154 *
 155 * If the device provided is a bridge, walk the subordinate bus, including
 156 * any bridged devices on buses under this bus.  Call the provided callback
 157 * on each device found.
 158 *
 159 * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP,
 160 * call the callback on the device itself.
 161 */
 162static void pci_walk_bridge(struct pci_dev *bridge,
 163                            int (*cb)(struct pci_dev *, void *),
 164                            void *userdata)
 165{
 166        if (bridge->subordinate)
 167                pci_walk_bus(bridge->subordinate, cb, userdata);
 168        else
 169                cb(bridge, userdata);
 170}
 171
 172pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 173                pci_channel_state_t state,
 174                pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev))
 175{
 176        int type = pci_pcie_type(dev);
 177        struct pci_dev *bridge;
 178        pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
 179        struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
 180
 181        /*
 182         * If the error was detected by a Root Port, Downstream Port, RCEC,
 183         * or RCiEP, recovery runs on the device itself.  For Ports, that
 184         * also includes any subordinate devices.
 185         *
 186         * If it was detected by another device (Endpoint, etc), recovery
 187         * runs on the device and anything else under the same Port, i.e.,
 188         * everything under "bridge".
 189         */
 190        if (type == PCI_EXP_TYPE_ROOT_PORT ||
 191            type == PCI_EXP_TYPE_DOWNSTREAM ||
 192            type == PCI_EXP_TYPE_RC_EC ||
 193            type == PCI_EXP_TYPE_RC_END)
 194                bridge = dev;
 195        else
 196                bridge = pci_upstream_bridge(dev);
 197
 198        pci_dbg(bridge, "broadcast error_detected message\n");
 199        if (state == pci_channel_io_frozen) {
 200                pci_walk_bridge(bridge, report_frozen_detected, &status);
 201                if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) {
 202                        pci_warn(bridge, "subordinate device reset failed\n");
 203                        goto failed;
 204                }
 205        } else {
 206                pci_walk_bridge(bridge, report_normal_detected, &status);
 207        }
 208
 209        if (status == PCI_ERS_RESULT_CAN_RECOVER) {
 210                status = PCI_ERS_RESULT_RECOVERED;
 211                pci_dbg(bridge, "broadcast mmio_enabled message\n");
 212                pci_walk_bridge(bridge, report_mmio_enabled, &status);
 213        }
 214
 215        if (status == PCI_ERS_RESULT_NEED_RESET) {
 216                /*
 217                 * TODO: Should call platform-specific
 218                 * functions to reset slot before calling
 219                 * drivers' slot_reset callbacks?
 220                 */
 221                status = PCI_ERS_RESULT_RECOVERED;
 222                pci_dbg(bridge, "broadcast slot_reset message\n");
 223                pci_walk_bridge(bridge, report_slot_reset, &status);
 224        }
 225
 226        if (status != PCI_ERS_RESULT_RECOVERED)
 227                goto failed;
 228
 229        pci_dbg(bridge, "broadcast resume message\n");
 230        pci_walk_bridge(bridge, report_resume, &status);
 231
 232        /*
 233         * If we have native control of AER, clear error status in the device
 234         * that detected the error.  If the platform retained control of AER,
 235         * it is responsible for clearing this status.  In that case, the
 236         * signaling device may not even be visible to the OS.
 237         */
 238        if (host->native_aer || pcie_ports_native) {
 239                pcie_clear_device_status(dev);
 240                pci_aer_clear_nonfatal_status(dev);
 241        }
 242        pci_info(bridge, "device recovery successful\n");
 243        return status;
 244
 245failed:
 246        pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);
 247
 248        /* TODO: Should kernel panic here? */
 249        pci_info(bridge, "device recovery failed\n");
 250
 251        return status;
 252}
 253