linux/drivers/crypto/qat/qat_common/adf_aer.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
   2/* Copyright(c) 2014 - 2020 Intel Corporation */
   3#include <linux/kernel.h>
   4#include <linux/pci.h>
   5#include <linux/aer.h>
   6#include <linux/completion.h>
   7#include <linux/workqueue.h>
   8#include <linux/delay.h>
   9#include "adf_accel_devices.h"
  10#include "adf_common_drv.h"
  11
  12static struct workqueue_struct *device_reset_wq;
  13
  14static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
  15                                           pci_channel_state_t state)
  16{
  17        struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
  18
  19        dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n");
  20        if (!accel_dev) {
  21                dev_err(&pdev->dev, "Can't find acceleration device\n");
  22                return PCI_ERS_RESULT_DISCONNECT;
  23        }
  24
  25        if (state == pci_channel_io_perm_failure) {
  26                dev_err(&pdev->dev, "Can't recover from device error\n");
  27                return PCI_ERS_RESULT_DISCONNECT;
  28        }
  29
  30        return PCI_ERS_RESULT_NEED_RESET;
  31}
  32
  33/* reset dev data */
  34struct adf_reset_dev_data {
  35        int mode;
  36        struct adf_accel_dev *accel_dev;
  37        struct completion compl;
  38        struct work_struct reset_work;
  39};
  40
  41void adf_reset_sbr(struct adf_accel_dev *accel_dev)
  42{
  43        struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
  44        struct pci_dev *parent = pdev->bus->self;
  45        u16 bridge_ctl = 0;
  46
  47        if (!parent)
  48                parent = pdev;
  49
  50        if (!pci_wait_for_pending_transaction(pdev))
  51                dev_info(&GET_DEV(accel_dev),
  52                         "Transaction still in progress. Proceeding\n");
  53
  54        dev_info(&GET_DEV(accel_dev), "Secondary bus reset\n");
  55
  56        pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl);
  57        bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET;
  58        pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
  59        msleep(100);
  60        bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
  61        pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
  62        msleep(100);
  63}
  64EXPORT_SYMBOL_GPL(adf_reset_sbr);
  65
  66void adf_reset_flr(struct adf_accel_dev *accel_dev)
  67{
  68        pcie_flr(accel_to_pci_dev(accel_dev));
  69}
  70EXPORT_SYMBOL_GPL(adf_reset_flr);
  71
  72void adf_dev_restore(struct adf_accel_dev *accel_dev)
  73{
  74        struct adf_hw_device_data *hw_device = accel_dev->hw_device;
  75        struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
  76
  77        if (hw_device->reset_device) {
  78                dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n",
  79                         accel_dev->accel_id);
  80                hw_device->reset_device(accel_dev);
  81                pci_restore_state(pdev);
  82                pci_save_state(pdev);
  83        }
  84}
  85
  86static void adf_device_reset_worker(struct work_struct *work)
  87{
  88        struct adf_reset_dev_data *reset_data =
  89                  container_of(work, struct adf_reset_dev_data, reset_work);
  90        struct adf_accel_dev *accel_dev = reset_data->accel_dev;
  91
  92        adf_dev_restarting_notify(accel_dev);
  93        adf_dev_stop(accel_dev);
  94        adf_dev_shutdown(accel_dev);
  95        if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) {
  96                /* The device hanged and we can't restart it so stop here */
  97                dev_err(&GET_DEV(accel_dev), "Restart device failed\n");
  98                kfree(reset_data);
  99                WARN(1, "QAT: device restart failed. Device is unusable\n");
 100                return;
 101        }
 102        adf_dev_restarted_notify(accel_dev);
 103        clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
 104
 105        /* The dev is back alive. Notify the caller if in sync mode */
 106        if (reset_data->mode == ADF_DEV_RESET_SYNC)
 107                complete(&reset_data->compl);
 108        else
 109                kfree(reset_data);
 110}
 111
 112static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
 113                                      enum adf_dev_reset_mode mode)
 114{
 115        struct adf_reset_dev_data *reset_data;
 116
 117        if (!adf_dev_started(accel_dev) ||
 118            test_bit(ADF_STATUS_RESTARTING, &accel_dev->status))
 119                return 0;
 120
 121        set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
 122        reset_data = kzalloc(sizeof(*reset_data), GFP_KERNEL);
 123        if (!reset_data)
 124                return -ENOMEM;
 125        reset_data->accel_dev = accel_dev;
 126        init_completion(&reset_data->compl);
 127        reset_data->mode = mode;
 128        INIT_WORK(&reset_data->reset_work, adf_device_reset_worker);
 129        queue_work(device_reset_wq, &reset_data->reset_work);
 130
 131        /* If in sync mode wait for the result */
 132        if (mode == ADF_DEV_RESET_SYNC) {
 133                int ret = 0;
 134                /* Maximum device reset time is 10 seconds */
 135                unsigned long wait_jiffies = msecs_to_jiffies(10000);
 136                unsigned long timeout = wait_for_completion_timeout(
 137                                   &reset_data->compl, wait_jiffies);
 138                if (!timeout) {
 139                        dev_err(&GET_DEV(accel_dev),
 140                                "Reset device timeout expired\n");
 141                        ret = -EFAULT;
 142                }
 143                kfree(reset_data);
 144                return ret;
 145        }
 146        return 0;
 147}
 148
 149static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
 150{
 151        struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
 152
 153        if (!accel_dev) {
 154                pr_err("QAT: Can't find acceleration device\n");
 155                return PCI_ERS_RESULT_DISCONNECT;
 156        }
 157        if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC))
 158                return PCI_ERS_RESULT_DISCONNECT;
 159
 160        return PCI_ERS_RESULT_RECOVERED;
 161}
 162
 163static void adf_resume(struct pci_dev *pdev)
 164{
 165        dev_info(&pdev->dev, "Acceleration driver reset completed\n");
 166        dev_info(&pdev->dev, "Device is up and running\n");
 167}
 168
 169static const struct pci_error_handlers adf_err_handler = {
 170        .error_detected = adf_error_detected,
 171        .slot_reset = adf_slot_reset,
 172        .resume = adf_resume,
 173};
 174
 175/**
 176 * adf_enable_aer() - Enable Advance Error Reporting for acceleration device
 177 * @accel_dev:  Pointer to acceleration device.
 178 *
 179 * Function enables PCI Advance Error Reporting for the
 180 * QAT acceleration device accel_dev.
 181 * To be used by QAT device specific drivers.
 182 *
 183 * Return: 0 on success, error code otherwise.
 184 */
 185int adf_enable_aer(struct adf_accel_dev *accel_dev)
 186{
 187        struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
 188        struct pci_driver *pdrv = pdev->driver;
 189
 190        pdrv->err_handler = &adf_err_handler;
 191        pci_enable_pcie_error_reporting(pdev);
 192        return 0;
 193}
 194EXPORT_SYMBOL_GPL(adf_enable_aer);
 195
 196/**
 197 * adf_disable_aer() - Disable Advance Error Reporting for acceleration device
 198 * @accel_dev:  Pointer to acceleration device.
 199 *
 200 * Function disables PCI Advance Error Reporting for the
 201 * QAT acceleration device accel_dev.
 202 * To be used by QAT device specific drivers.
 203 *
 204 * Return: void
 205 */
 206void adf_disable_aer(struct adf_accel_dev *accel_dev)
 207{
 208        struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
 209
 210        pci_disable_pcie_error_reporting(pdev);
 211}
 212EXPORT_SYMBOL_GPL(adf_disable_aer);
 213
 214int adf_init_aer(void)
 215{
 216        device_reset_wq = alloc_workqueue("qat_device_reset_wq",
 217                                          WQ_MEM_RECLAIM, 0);
 218        return !device_reset_wq ? -EFAULT : 0;
 219}
 220
 221void adf_exit_aer(void)
 222{
 223        if (device_reset_wq)
 224                destroy_workqueue(device_reset_wq);
 225        device_reset_wq = NULL;
 226}
 227