1
2
3
4#include "health.h"
5#include "lib/eq.h"
6#include "lib/mlx5.h"
7
8int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
9{
10 int err;
11
12 err = devlink_fmsg_pair_nest_start(fmsg, name);
13 if (err)
14 return err;
15
16 err = devlink_fmsg_obj_nest_start(fmsg);
17 if (err)
18 return err;
19
20 return 0;
21}
22
23int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
24{
25 int err;
26
27 err = devlink_fmsg_obj_nest_end(fmsg);
28 if (err)
29 return err;
30
31 err = devlink_fmsg_pair_nest_end(fmsg);
32 if (err)
33 return err;
34
35 return 0;
36}
37
38int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
39{
40 u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
41 u8 hw_status;
42 void *cqc;
43 int err;
44
45 err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out);
46 if (err)
47 return err;
48
49 cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
50 hw_status = MLX5_GET(cqc, cqc, status);
51
52 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
53 if (err)
54 return err;
55
56 err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
57 if (err)
58 return err;
59
60 err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
61 if (err)
62 return err;
63
64 err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
65 if (err)
66 return err;
67
68 err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
69 if (err)
70 return err;
71
72 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
73 if (err)
74 return err;
75
76 return 0;
77}
78
79int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
80{
81 u8 cq_log_stride;
82 u32 cq_sz;
83 int err;
84
85 cq_sz = mlx5_cqwq_get_size(&cq->wq);
86 cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
87
88 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
89 if (err)
90 return err;
91
92 err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
93 if (err)
94 return err;
95
96 err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
97 if (err)
98 return err;
99
100 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
101 if (err)
102 return err;
103
104 return 0;
105}
106
107int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
108{
109 int err;
110
111 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
112 if (err)
113 return err;
114
115 err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
116 if (err)
117 return err;
118
119 err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
120 if (err)
121 return err;
122
123 err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
124 if (err)
125 return err;
126
127 err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
128 if (err)
129 return err;
130
131 err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
132 if (err)
133 return err;
134
135 return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
136}
137
138void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
139{
140 mlx5e_reporter_tx_create(priv);
141 mlx5e_reporter_rx_create(priv);
142}
143
144void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
145{
146 mlx5e_reporter_rx_destroy(priv);
147 mlx5e_reporter_tx_destroy(priv);
148}
149
150void mlx5e_health_channels_update(struct mlx5e_priv *priv)
151{
152 if (priv->tx_reporter)
153 devlink_health_reporter_state_update(priv->tx_reporter,
154 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
155 if (priv->rx_reporter)
156 devlink_health_reporter_state_update(priv->rx_reporter,
157 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
158}
159
160int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn)
161{
162 struct mlx5e_modify_sq_param msp = {};
163 int err;
164
165 msp.curr_state = MLX5_SQC_STATE_ERR;
166 msp.next_state = MLX5_SQC_STATE_RST;
167
168 err = mlx5e_modify_sq(mdev, sqn, &msp);
169 if (err) {
170 netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
171 return err;
172 }
173
174 memset(&msp, 0, sizeof(msp));
175 msp.curr_state = MLX5_SQC_STATE_RST;
176 msp.next_state = MLX5_SQC_STATE_RDY;
177
178 err = mlx5e_modify_sq(mdev, sqn, &msp);
179 if (err) {
180 netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
181 return err;
182 }
183
184 return 0;
185}
186
187int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
188{
189 int err = 0;
190
191 rtnl_lock();
192 mutex_lock(&priv->state_lock);
193
194 if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
195 goto out;
196
197 err = mlx5e_safe_reopen_channels(priv);
198
199out:
200 mutex_unlock(&priv->state_lock);
201 rtnl_unlock();
202
203 return err;
204}
205
206int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
207 struct mlx5e_ch_stats *stats)
208{
209 u32 eqe_count;
210
211 netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
212 eq->core.eqn, eq->core.cons_index, eq->core.irqn);
213
214 eqe_count = mlx5_eq_poll_irq_disabled(eq);
215 if (!eqe_count)
216 return -EIO;
217
218 netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n",
219 eqe_count, eq->core.eqn);
220
221 stats->eq_rearm++;
222 return 0;
223}
224
225int mlx5e_health_report(struct mlx5e_priv *priv,
226 struct devlink_health_reporter *reporter, char *err_str,
227 struct mlx5e_err_ctx *err_ctx)
228{
229 netdev_err(priv->netdev, "%s\n", err_str);
230
231 if (!reporter)
232 return err_ctx->recover(err_ctx->ctx);
233
234 return devlink_health_report(reporter, err_str, err_ctx);
235}
236
237#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
238static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
239 const void *value, u32 value_len)
240
241{
242 u32 data_size;
243 int err = 0;
244 u32 offset;
245
246 for (offset = 0; offset < value_len; offset += data_size) {
247 data_size = value_len - offset;
248 if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
249 data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
250 err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
251 if (err)
252 break;
253 }
254 return err;
255}
256
257int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
258 struct devlink_fmsg *fmsg)
259{
260 struct mlx5_core_dev *mdev = priv->mdev;
261 struct mlx5_rsc_dump_cmd *cmd;
262 struct page *page;
263 int cmd_err, err;
264 int end_err;
265 int size;
266
267 if (IS_ERR_OR_NULL(mdev->rsc_dump))
268 return -EOPNOTSUPP;
269
270 page = alloc_page(GFP_KERNEL);
271 if (!page)
272 return -ENOMEM;
273
274 err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
275 if (err)
276 goto free_page;
277
278 cmd = mlx5_rsc_dump_cmd_create(mdev, key);
279 if (IS_ERR(cmd)) {
280 err = PTR_ERR(cmd);
281 goto free_page;
282 }
283
284 do {
285 cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
286 if (cmd_err < 0) {
287 err = cmd_err;
288 goto destroy_cmd;
289 }
290
291 err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
292 if (err)
293 goto destroy_cmd;
294
295 } while (cmd_err > 0);
296
297destroy_cmd:
298 mlx5_rsc_dump_cmd_destroy(cmd);
299 end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
300 if (end_err)
301 err = end_err;
302free_page:
303 __free_page(page);
304 return err;
305}
306
307int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
308 int queue_idx, char *lbl)
309{
310 struct mlx5_rsc_key key = {};
311 int err;
312
313 key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
314 key.index1 = queue_idx;
315 key.size = PAGE_SIZE;
316 key.num_of_obj1 = 1;
317
318 err = devlink_fmsg_obj_nest_start(fmsg);
319 if (err)
320 return err;
321
322 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
323 if (err)
324 return err;
325
326 err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
327 if (err)
328 return err;
329
330 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
331 if (err)
332 return err;
333
334 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
335 if (err)
336 return err;
337
338 return devlink_fmsg_obj_nest_end(fmsg);
339}
340