1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/kernel.h>
48#include <linux/pci.h>
49#include <linux/aer.h>
50#include <linux/completion.h>
51#include <linux/workqueue.h>
52#include <linux/delay.h>
53#include "adf_accel_devices.h"
54#include "adf_common_drv.h"
55
56static struct workqueue_struct *device_reset_wq;
57
58static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
59 pci_channel_state_t state)
60{
61 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
62
63 dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n");
64 if (!accel_dev) {
65 dev_err(&pdev->dev, "Can't find acceleration device\n");
66 return PCI_ERS_RESULT_DISCONNECT;
67 }
68
69 if (state == pci_channel_io_perm_failure) {
70 dev_err(&pdev->dev, "Can't recover from device error\n");
71 return PCI_ERS_RESULT_DISCONNECT;
72 }
73
74 return PCI_ERS_RESULT_NEED_RESET;
75}
76
77
78struct adf_reset_dev_data {
79 int mode;
80 struct adf_accel_dev *accel_dev;
81 struct completion compl;
82 struct work_struct reset_work;
83};
84
85void adf_dev_restore(struct adf_accel_dev *accel_dev)
86{
87 struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
88 struct pci_dev *parent = pdev->bus->self;
89 uint16_t bridge_ctl = 0;
90
91 if (accel_dev->is_vf)
92 return;
93
94 dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n",
95 accel_dev->accel_id);
96
97 if (!parent)
98 parent = pdev;
99
100 if (!pci_wait_for_pending_transaction(pdev))
101 dev_info(&GET_DEV(accel_dev),
102 "Transaction still in progress. Proceeding\n");
103
104 pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl);
105 bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET;
106 pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
107 msleep(100);
108 bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
109 pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
110 msleep(100);
111 pci_restore_state(pdev);
112 pci_save_state(pdev);
113}
114
115static void adf_device_reset_worker(struct work_struct *work)
116{
117 struct adf_reset_dev_data *reset_data =
118 container_of(work, struct adf_reset_dev_data, reset_work);
119 struct adf_accel_dev *accel_dev = reset_data->accel_dev;
120
121 adf_dev_restarting_notify(accel_dev);
122 adf_dev_stop(accel_dev);
123 adf_dev_shutdown(accel_dev);
124 if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) {
125
126 dev_err(&GET_DEV(accel_dev), "Restart device failed\n");
127 kfree(reset_data);
128 WARN(1, "QAT: device restart failed. Device is unusable\n");
129 return;
130 }
131 adf_dev_restarted_notify(accel_dev);
132 clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
133
134
135 if (reset_data->mode == ADF_DEV_RESET_SYNC)
136 complete(&reset_data->compl);
137 else
138 kfree(reset_data);
139}
140
141static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
142 enum adf_dev_reset_mode mode)
143{
144 struct adf_reset_dev_data *reset_data;
145
146 if (!adf_dev_started(accel_dev) ||
147 test_bit(ADF_STATUS_RESTARTING, &accel_dev->status))
148 return 0;
149
150 set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
151 reset_data = kzalloc(sizeof(*reset_data), GFP_ATOMIC);
152 if (!reset_data)
153 return -ENOMEM;
154 reset_data->accel_dev = accel_dev;
155 init_completion(&reset_data->compl);
156 reset_data->mode = mode;
157 INIT_WORK(&reset_data->reset_work, adf_device_reset_worker);
158 queue_work(device_reset_wq, &reset_data->reset_work);
159
160
161 if (mode == ADF_DEV_RESET_SYNC) {
162 int ret = 0;
163
164 unsigned long wait_jiffies = msecs_to_jiffies(10000);
165 unsigned long timeout = wait_for_completion_timeout(
166 &reset_data->compl, wait_jiffies);
167 if (!timeout) {
168 dev_err(&GET_DEV(accel_dev),
169 "Reset device timeout expired\n");
170 ret = -EFAULT;
171 }
172 kfree(reset_data);
173 return ret;
174 }
175 return 0;
176}
177
178static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
179{
180 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
181
182 if (!accel_dev) {
183 pr_err("QAT: Can't find acceleration device\n");
184 return PCI_ERS_RESULT_DISCONNECT;
185 }
186 pci_cleanup_aer_uncorrect_error_status(pdev);
187 if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC))
188 return PCI_ERS_RESULT_DISCONNECT;
189
190 return PCI_ERS_RESULT_RECOVERED;
191}
192
193static void adf_resume(struct pci_dev *pdev)
194{
195 dev_info(&pdev->dev, "Acceleration driver reset completed\n");
196 dev_info(&pdev->dev, "Device is up and runnig\n");
197}
198
199static const struct pci_error_handlers adf_err_handler = {
200 .error_detected = adf_error_detected,
201 .slot_reset = adf_slot_reset,
202 .resume = adf_resume,
203};
204
205
206
207
208
209
210
211
212
213
214
215
216int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf)
217{
218 struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
219
220 adf->err_handler = &adf_err_handler;
221 pci_enable_pcie_error_reporting(pdev);
222 return 0;
223}
224EXPORT_SYMBOL_GPL(adf_enable_aer);
225
226
227
228
229
230
231
232
233
234
235
236void adf_disable_aer(struct adf_accel_dev *accel_dev)
237{
238 struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
239
240 pci_disable_pcie_error_reporting(pdev);
241}
242EXPORT_SYMBOL_GPL(adf_disable_aer);
243
244int adf_init_aer(void)
245{
246 device_reset_wq = create_workqueue("qat_device_reset_wq");
247 return !device_reset_wq ? -EFAULT : 0;
248}
249
250void adf_exit_aer(void)
251{
252 if (device_reset_wq)
253 destroy_workqueue(device_reset_wq);
254 device_reset_wq = NULL;
255}
256