1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/kernel.h>
48#include <linux/pci.h>
49#include <linux/aer.h>
50#include <linux/completion.h>
51#include <linux/workqueue.h>
52#include <linux/delay.h>
53#include "adf_accel_devices.h"
54#include "adf_common_drv.h"
55
56static struct workqueue_struct *device_reset_wq;
57
58static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
59 pci_channel_state_t state)
60{
61 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
62
63 dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n");
64 if (!accel_dev) {
65 dev_err(&pdev->dev, "Can't find acceleration device\n");
66 return PCI_ERS_RESULT_DISCONNECT;
67 }
68
69 if (state == pci_channel_io_perm_failure) {
70 dev_err(&pdev->dev, "Can't recover from device error\n");
71 return PCI_ERS_RESULT_DISCONNECT;
72 }
73
74 return PCI_ERS_RESULT_NEED_RESET;
75}
76
77
78struct adf_reset_dev_data {
79 int mode;
80 struct adf_accel_dev *accel_dev;
81 struct completion compl;
82 struct work_struct reset_work;
83};
84
85void adf_reset_sbr(struct adf_accel_dev *accel_dev)
86{
87 struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
88 struct pci_dev *parent = pdev->bus->self;
89 uint16_t bridge_ctl = 0;
90
91 if (!parent)
92 parent = pdev;
93
94 if (!pci_wait_for_pending_transaction(pdev))
95 dev_info(&GET_DEV(accel_dev),
96 "Transaction still in progress. Proceeding\n");
97
98 dev_info(&GET_DEV(accel_dev), "Secondary bus reset\n");
99
100 pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl);
101 bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET;
102 pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
103 msleep(100);
104 bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
105 pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl);
106 msleep(100);
107}
108EXPORT_SYMBOL_GPL(adf_reset_sbr);
109
110void adf_reset_flr(struct adf_accel_dev *accel_dev)
111{
112 pcie_flr(accel_to_pci_dev(accel_dev));
113}
114EXPORT_SYMBOL_GPL(adf_reset_flr);
115
116void adf_dev_restore(struct adf_accel_dev *accel_dev)
117{
118 struct adf_hw_device_data *hw_device = accel_dev->hw_device;
119 struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
120
121 if (hw_device->reset_device) {
122 dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n",
123 accel_dev->accel_id);
124 hw_device->reset_device(accel_dev);
125 pci_restore_state(pdev);
126 pci_save_state(pdev);
127 }
128}
129
130static void adf_device_reset_worker(struct work_struct *work)
131{
132 struct adf_reset_dev_data *reset_data =
133 container_of(work, struct adf_reset_dev_data, reset_work);
134 struct adf_accel_dev *accel_dev = reset_data->accel_dev;
135
136 adf_dev_restarting_notify(accel_dev);
137 adf_dev_stop(accel_dev);
138 adf_dev_shutdown(accel_dev);
139 if (adf_dev_init(accel_dev) || adf_dev_start(accel_dev)) {
140
141 dev_err(&GET_DEV(accel_dev), "Restart device failed\n");
142 kfree(reset_data);
143 WARN(1, "QAT: device restart failed. Device is unusable\n");
144 return;
145 }
146 adf_dev_restarted_notify(accel_dev);
147 clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
148
149
150 if (reset_data->mode == ADF_DEV_RESET_SYNC)
151 complete(&reset_data->compl);
152 else
153 kfree(reset_data);
154}
155
156static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
157 enum adf_dev_reset_mode mode)
158{
159 struct adf_reset_dev_data *reset_data;
160
161 if (!adf_dev_started(accel_dev) ||
162 test_bit(ADF_STATUS_RESTARTING, &accel_dev->status))
163 return 0;
164
165 set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
166 reset_data = kzalloc(sizeof(*reset_data), GFP_ATOMIC);
167 if (!reset_data)
168 return -ENOMEM;
169 reset_data->accel_dev = accel_dev;
170 init_completion(&reset_data->compl);
171 reset_data->mode = mode;
172 INIT_WORK(&reset_data->reset_work, adf_device_reset_worker);
173 queue_work(device_reset_wq, &reset_data->reset_work);
174
175
176 if (mode == ADF_DEV_RESET_SYNC) {
177 int ret = 0;
178
179 unsigned long wait_jiffies = msecs_to_jiffies(10000);
180 unsigned long timeout = wait_for_completion_timeout(
181 &reset_data->compl, wait_jiffies);
182 if (!timeout) {
183 dev_err(&GET_DEV(accel_dev),
184 "Reset device timeout expired\n");
185 ret = -EFAULT;
186 }
187 kfree(reset_data);
188 return ret;
189 }
190 return 0;
191}
192
193static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
194{
195 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
196
197 if (!accel_dev) {
198 pr_err("QAT: Can't find acceleration device\n");
199 return PCI_ERS_RESULT_DISCONNECT;
200 }
201 pci_cleanup_aer_uncorrect_error_status(pdev);
202 if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC))
203 return PCI_ERS_RESULT_DISCONNECT;
204
205 return PCI_ERS_RESULT_RECOVERED;
206}
207
208static void adf_resume(struct pci_dev *pdev)
209{
210 dev_info(&pdev->dev, "Acceleration driver reset completed\n");
211 dev_info(&pdev->dev, "Device is up and running\n");
212}
213
214static const struct pci_error_handlers adf_err_handler = {
215 .error_detected = adf_error_detected,
216 .slot_reset = adf_slot_reset,
217 .resume = adf_resume,
218};
219
220
221
222
223
224
225
226
227
228
229
230
231int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf)
232{
233 struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
234
235 adf->err_handler = &adf_err_handler;
236 pci_enable_pcie_error_reporting(pdev);
237 return 0;
238}
239EXPORT_SYMBOL_GPL(adf_enable_aer);
240
241
242
243
244
245
246
247
248
249
250
251void adf_disable_aer(struct adf_accel_dev *accel_dev)
252{
253 struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
254
255 pci_disable_pcie_error_reporting(pdev);
256}
257EXPORT_SYMBOL_GPL(adf_disable_aer);
258
259int adf_init_aer(void)
260{
261 device_reset_wq = alloc_workqueue("qat_device_reset_wq",
262 WQ_MEM_RECLAIM, 0);
263 return !device_reset_wq ? -EFAULT : 0;
264}
265
266void adf_exit_aer(void)
267{
268 if (device_reset_wq)
269 destroy_workqueue(device_reset_wq);
270 device_reset_wq = NULL;
271}
272