1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/module.h>
34#include <linux/etherdevice.h>
35#include <linux/mlx5/driver.h>
36
37#include "mlx5_core.h"
38#include "lib/mlx5.h"
39#include "lib/eq.h"
40#include "fpga/core.h"
41#include "fpga/conn.h"
42
43static const char *const mlx5_fpga_error_strings[] = {
44 "Null Syndrome",
45 "Corrupted DDR",
46 "Flash Timeout",
47 "Internal Link Error",
48 "Watchdog HW Failure",
49 "I2C Failure",
50 "Image Changed",
51 "Temperature Critical",
52};
53
54static const char * const mlx5_fpga_qp_error_strings[] = {
55 "Null Syndrome",
56 "Retry Counter Expired",
57 "RNR Expired",
58};
59static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
60{
61 struct mlx5_fpga_device *fdev = NULL;
62
63 fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
64 if (!fdev)
65 return NULL;
66
67 spin_lock_init(&fdev->state_lock);
68 fdev->state = MLX5_FPGA_STATUS_NONE;
69 return fdev;
70}
71
72static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
73{
74 switch (image) {
75 case MLX5_FPGA_IMAGE_USER:
76 return "user";
77 case MLX5_FPGA_IMAGE_FACTORY:
78 return "factory";
79 default:
80 return "unknown";
81 }
82}
83
84static const char *mlx5_fpga_name(u32 fpga_id)
85{
86 static char ret[32];
87
88 switch (fpga_id) {
89 case MLX5_FPGA_NEWTON:
90 return "Newton";
91 case MLX5_FPGA_EDISON:
92 return "Edison";
93 case MLX5_FPGA_MORSE:
94 return "Morse";
95 case MLX5_FPGA_MORSEQ:
96 return "MorseQ";
97 }
98
99 snprintf(ret, sizeof(ret), "Unknown %d", fpga_id);
100 return ret;
101}
102
103static int mlx5_is_fpga_lookaside(u32 fpga_id)
104{
105 return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON;
106}
107
108static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
109{
110 struct mlx5_fpga_query query;
111 int err;
112
113 err = mlx5_fpga_query(fdev->mdev, &query);
114 if (err) {
115 mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
116 return err;
117 }
118
119 fdev->last_admin_image = query.admin_image;
120 fdev->last_oper_image = query.oper_image;
121
122 mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
123 query.status, query.admin_image, query.oper_image);
124
125
126 if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
127 return 0;
128
129 if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
130 mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
131 mlx5_fpga_image_name(fdev->last_oper_image),
132 query.status);
133 return -EIO;
134 }
135
136 return 0;
137}
138
139static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
140{
141 int err;
142 struct mlx5_core_dev *mdev = fdev->mdev;
143
144 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
145 if (err) {
146 mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
147 return err;
148 }
149 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
150 if (err) {
151 mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
152 return err;
153 }
154 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
155 if (err) {
156 mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
157 return err;
158 }
159 return 0;
160}
161
162static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
163
164static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
165{
166 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
167
168 return mlx5_fpga_event(fdev, event, eqe);
169}
170
171static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
172{
173 struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
174
175 return mlx5_fpga_event(fdev, event, eqe);
176}
177
178int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
179{
180 struct mlx5_fpga_device *fdev = mdev->fpga;
181 unsigned int max_num_qps;
182 unsigned long flags;
183 u32 fpga_id;
184 int err;
185
186 if (!fdev)
187 return 0;
188
189 err = mlx5_fpga_caps(fdev->mdev);
190 if (err)
191 goto out;
192
193 err = mlx5_fpga_device_load_check(fdev);
194 if (err)
195 goto out;
196
197 fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
198 mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id);
199
200
201 if (mlx5_is_fpga_lookaside(fpga_id))
202 goto out;
203
204 mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n",
205 mlx5_fpga_image_name(fdev->last_oper_image),
206 fdev->last_oper_image,
207 MLX5_CAP_FPGA(fdev->mdev, image_version),
208 MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
209 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
210 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
211
212 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
213 if (!max_num_qps) {
214 mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
215 err = -ENOTSUPP;
216 goto out;
217 }
218
219 err = mlx5_core_reserve_gids(mdev, max_num_qps);
220 if (err)
221 goto out;
222
223 MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
224 MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
225 mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
226 mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
227
228 err = mlx5_fpga_conn_device_init(fdev);
229 if (err)
230 goto err_rsvd_gid;
231
232 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
233 err = mlx5_fpga_device_brb(fdev);
234 if (err)
235 goto err_conn_init;
236 }
237
238 goto out;
239
240err_conn_init:
241 mlx5_fpga_conn_device_cleanup(fdev);
242
243err_rsvd_gid:
244 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
245 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
246 mlx5_core_unreserve_gids(mdev, max_num_qps);
247out:
248 spin_lock_irqsave(&fdev->state_lock, flags);
249 fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
250 spin_unlock_irqrestore(&fdev->state_lock, flags);
251 return err;
252}
253
254int mlx5_fpga_init(struct mlx5_core_dev *mdev)
255{
256 struct mlx5_fpga_device *fdev = NULL;
257
258 if (!MLX5_CAP_GEN(mdev, fpga)) {
259 mlx5_core_dbg(mdev, "FPGA capability not present\n");
260 return 0;
261 }
262
263 mlx5_core_dbg(mdev, "Initializing FPGA\n");
264
265 fdev = mlx5_fpga_device_alloc();
266 if (!fdev)
267 return -ENOMEM;
268
269 fdev->mdev = mdev;
270 mdev->fpga = fdev;
271
272 return 0;
273}
274
275void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
276{
277 struct mlx5_fpga_device *fdev = mdev->fpga;
278 unsigned int max_num_qps;
279 unsigned long flags;
280 int err;
281
282 if (!fdev)
283 return;
284
285 if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
286 return;
287
288 spin_lock_irqsave(&fdev->state_lock, flags);
289 if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
290 spin_unlock_irqrestore(&fdev->state_lock, flags);
291 return;
292 }
293 fdev->state = MLX5_FPGA_STATUS_NONE;
294 spin_unlock_irqrestore(&fdev->state_lock, flags);
295
296 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
297 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
298 if (err)
299 mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
300 err);
301 }
302
303 mlx5_fpga_conn_device_cleanup(fdev);
304 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
305 mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
306
307 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
308 mlx5_core_unreserve_gids(mdev, max_num_qps);
309}
310
311void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
312{
313 struct mlx5_fpga_device *fdev = mdev->fpga;
314
315 mlx5_fpga_device_stop(mdev);
316 kfree(fdev);
317 mdev->fpga = NULL;
318}
319
320static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
321{
322 if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
323 return mlx5_fpga_error_strings[syndrome];
324 return "Unknown";
325}
326
327static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
328{
329 if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
330 return mlx5_fpga_qp_error_strings[syndrome];
331 return "Unknown";
332}
333
334static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
335 unsigned long event, void *eqe)
336{
337 void *data = ((struct mlx5_eqe *)eqe)->data.raw;
338 const char *event_name;
339 bool teardown = false;
340 unsigned long flags;
341 u8 syndrome;
342
343 switch (event) {
344 case MLX5_EVENT_TYPE_FPGA_ERROR:
345 syndrome = MLX5_GET(fpga_error_event, data, syndrome);
346 event_name = mlx5_fpga_syndrome_to_string(syndrome);
347 break;
348 case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
349 syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
350 event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
351 break;
352 default:
353 return NOTIFY_DONE;
354 }
355
356 spin_lock_irqsave(&fdev->state_lock, flags);
357 switch (fdev->state) {
358 case MLX5_FPGA_STATUS_SUCCESS:
359 mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
360 teardown = true;
361 break;
362 default:
363 mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
364 syndrome, event_name);
365 }
366 spin_unlock_irqrestore(&fdev->state_lock, flags);
367
368
369
370
371
372 if (teardown)
373 mlx5_trigger_health_work(fdev->mdev);
374
375 return NOTIFY_OK;
376}
377