1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include "mlx5_ib.h"
34
35struct mlx5_ib_gsi_wr {
36 struct ib_cqe cqe;
37 struct ib_wc wc;
38 bool completed:1;
39};
40
41static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
42{
43 return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
44}
45
46
47static void generate_completions(struct mlx5_ib_qp *mqp)
48{
49 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
50 struct ib_cq *gsi_cq = mqp->ibqp.send_cq;
51 struct mlx5_ib_gsi_wr *wr;
52 u32 index;
53
54 for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
55 index++) {
56 wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
57
58 if (!wr->completed)
59 break;
60
61 WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
62 wr->completed = false;
63 }
64
65 gsi->outstanding_ci = index;
66}
67
68static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
69{
70 struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
71 struct mlx5_ib_gsi_wr *wr =
72 container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
73 struct mlx5_ib_qp *mqp = container_of(gsi, struct mlx5_ib_qp, gsi);
74 u64 wr_id;
75 unsigned long flags;
76
77 spin_lock_irqsave(&gsi->lock, flags);
78 wr->completed = true;
79 wr_id = wr->wc.wr_id;
80 wr->wc = *wc;
81 wr->wc.wr_id = wr_id;
82 wr->wc.qp = &mqp->ibqp;
83
84 generate_completions(mqp);
85 spin_unlock_irqrestore(&gsi->lock, flags);
86}
87
88int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
89 struct ib_qp_init_attr *attr)
90{
91 struct mlx5_ib_dev *dev = to_mdev(pd->device);
92 struct mlx5_ib_gsi_qp *gsi;
93 struct ib_qp_init_attr hw_init_attr = *attr;
94 const u8 port_num = attr->port_num;
95 int num_qps = 0;
96 int ret;
97
98 if (mlx5_ib_deth_sqpn_cap(dev)) {
99 if (MLX5_CAP_GEN(dev->mdev,
100 port_type) == MLX5_CAP_PORT_TYPE_IB)
101 num_qps = pd->device->attrs.max_pkeys;
102 else if (dev->lag_active)
103 num_qps = MLX5_MAX_PORTS;
104 }
105
106 gsi = &mqp->gsi;
107 gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
108 if (!gsi->tx_qps)
109 return -ENOMEM;
110
111 gsi->outstanding_wrs =
112 kcalloc(attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs),
113 GFP_KERNEL);
114 if (!gsi->outstanding_wrs) {
115 ret = -ENOMEM;
116 goto err_free_tx;
117 }
118
119 if (dev->devr.ports[port_num - 1].gsi) {
120 mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
121 port_num);
122 ret = -EBUSY;
123 goto err_free_wrs;
124 }
125 gsi->num_qps = num_qps;
126 spin_lock_init(&gsi->lock);
127
128 gsi->cap = attr->cap;
129 gsi->port_num = port_num;
130
131 gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0,
132 IB_POLL_SOFTIRQ);
133 if (IS_ERR(gsi->cq)) {
134 mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
135 PTR_ERR(gsi->cq));
136 ret = PTR_ERR(gsi->cq);
137 goto err_free_wrs;
138 }
139
140 hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
141 hw_init_attr.send_cq = gsi->cq;
142 if (num_qps) {
143 hw_init_attr.cap.max_send_wr = 0;
144 hw_init_attr.cap.max_send_sge = 0;
145 hw_init_attr.cap.max_inline_data = 0;
146 }
147
148 gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
149 if (IS_ERR(gsi->rx_qp)) {
150 mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
151 PTR_ERR(gsi->rx_qp));
152 ret = PTR_ERR(gsi->rx_qp);
153 goto err_destroy_cq;
154 }
155
156 dev->devr.ports[attr->port_num - 1].gsi = gsi;
157 return 0;
158
159err_destroy_cq:
160 ib_free_cq(gsi->cq);
161err_free_wrs:
162 kfree(gsi->outstanding_wrs);
163err_free_tx:
164 kfree(gsi->tx_qps);
165 return ret;
166}
167
168int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp)
169{
170 struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
171 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
172 const int port_num = gsi->port_num;
173 int qp_index;
174 int ret;
175
176 ret = ib_destroy_qp(gsi->rx_qp);
177 if (ret) {
178 mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
179 ret);
180 return ret;
181 }
182 dev->devr.ports[port_num - 1].gsi = NULL;
183 gsi->rx_qp = NULL;
184
185 for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
186 if (!gsi->tx_qps[qp_index])
187 continue;
188 WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
189 gsi->tx_qps[qp_index] = NULL;
190 }
191
192 ib_free_cq(gsi->cq);
193
194 kfree(gsi->outstanding_wrs);
195 kfree(gsi->tx_qps);
196 return 0;
197}
198
199static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
200{
201 struct ib_pd *pd = gsi->rx_qp->pd;
202 struct ib_qp_init_attr init_attr = {
203 .event_handler = gsi->rx_qp->event_handler,
204 .qp_context = gsi->rx_qp->qp_context,
205 .send_cq = gsi->cq,
206 .recv_cq = gsi->rx_qp->recv_cq,
207 .cap = {
208 .max_send_wr = gsi->cap.max_send_wr,
209 .max_send_sge = gsi->cap.max_send_sge,
210 .max_inline_data = gsi->cap.max_inline_data,
211 },
212 .qp_type = IB_QPT_UD,
213 .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
214 };
215
216 return ib_create_qp(pd, &init_attr);
217}
218
219static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
220 u16 pkey_index)
221{
222 struct mlx5_ib_dev *dev = to_mdev(qp->device);
223 struct ib_qp_attr attr;
224 int mask;
225 int ret;
226
227 mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
228 attr.qp_state = IB_QPS_INIT;
229 attr.pkey_index = pkey_index;
230 attr.qkey = IB_QP1_QKEY;
231 attr.port_num = gsi->port_num;
232 ret = ib_modify_qp(qp, &attr, mask);
233 if (ret) {
234 mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
235 qp->qp_num, ret);
236 return ret;
237 }
238
239 attr.qp_state = IB_QPS_RTR;
240 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
241 if (ret) {
242 mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
243 qp->qp_num, ret);
244 return ret;
245 }
246
247 attr.qp_state = IB_QPS_RTS;
248 attr.sq_psn = 0;
249 ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
250 if (ret) {
251 mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
252 qp->qp_num, ret);
253 return ret;
254 }
255
256 return 0;
257}
258
259static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
260{
261 struct ib_device *device = gsi->rx_qp->device;
262 struct mlx5_ib_dev *dev = to_mdev(device);
263 int pkey_index = qp_index;
264 struct mlx5_ib_qp *mqp;
265 struct ib_qp *qp;
266 unsigned long flags;
267 u16 pkey;
268 int ret;
269
270 if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
271 pkey_index = 0;
272
273 ret = ib_query_pkey(device, gsi->port_num, pkey_index, &pkey);
274 if (ret) {
275 mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
276 gsi->port_num, qp_index);
277 return;
278 }
279
280 if (!pkey) {
281 mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n",
282 gsi->port_num, qp_index);
283 return;
284 }
285
286 spin_lock_irqsave(&gsi->lock, flags);
287 qp = gsi->tx_qps[qp_index];
288 spin_unlock_irqrestore(&gsi->lock, flags);
289 if (qp) {
290 mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
291 gsi->port_num, qp_index);
292 return;
293 }
294
295 qp = create_gsi_ud_qp(gsi);
296 if (IS_ERR(qp)) {
297 mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
298 PTR_ERR(qp));
299 return;
300 }
301
302 mqp = to_mqp(qp);
303 if (dev->lag_active)
304 mqp->gsi_lag_port = qp_index + 1;
305 ret = modify_to_rts(gsi, qp, pkey_index);
306 if (ret)
307 goto err_destroy_qp;
308
309 spin_lock_irqsave(&gsi->lock, flags);
310 WARN_ON_ONCE(gsi->tx_qps[qp_index]);
311 gsi->tx_qps[qp_index] = qp;
312 spin_unlock_irqrestore(&gsi->lock, flags);
313
314 return;
315
316err_destroy_qp:
317 WARN_ON_ONCE(qp);
318}
319
320int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
321 int attr_mask)
322{
323 struct mlx5_ib_dev *dev = to_mdev(qp->device);
324 struct mlx5_ib_qp *mqp = to_mqp(qp);
325 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
326 u16 qp_index;
327 int ret;
328
329 mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
330
331 ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
332 if (ret) {
333 mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
334 return ret;
335 }
336
337 if (to_mqp(gsi->rx_qp)->state != IB_QPS_RTS)
338 return 0;
339
340 for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
341 setup_qp(gsi, qp_index);
342 return 0;
343}
344
345int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
346 int qp_attr_mask,
347 struct ib_qp_init_attr *qp_init_attr)
348{
349 struct mlx5_ib_qp *mqp = to_mqp(qp);
350 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
351 int ret;
352
353 ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
354 qp_init_attr->cap = gsi->cap;
355 return ret;
356}
357
358
359static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_qp *mqp,
360 struct ib_ud_wr *wr, struct ib_wc *wc)
361{
362 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
363 struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
364 struct mlx5_ib_gsi_wr *gsi_wr;
365
366 if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
367 mlx5_ib_warn(dev, "no available GSI work request.\n");
368 return -ENOMEM;
369 }
370
371 gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
372 gsi->cap.max_send_wr];
373 gsi->outstanding_pi++;
374
375 if (!wc) {
376 memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
377 gsi_wr->wc.pkey_index = wr->pkey_index;
378 gsi_wr->wc.wr_id = wr->wr.wr_id;
379 } else {
380 gsi_wr->wc = *wc;
381 gsi_wr->completed = true;
382 }
383
384 gsi_wr->cqe.done = &handle_single_completion;
385 wr->wr.wr_cqe = &gsi_wr->cqe;
386
387 return 0;
388}
389
390
391static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr)
392{
393 struct ib_wc wc = {
394 { .wr_id = wr->wr.wr_id },
395 .status = IB_WC_SUCCESS,
396 .opcode = IB_WC_SEND,
397 .qp = &mqp->ibqp,
398 };
399 int ret;
400
401 ret = mlx5_ib_add_outstanding_wr(mqp, wr, &wc);
402 if (ret)
403 return ret;
404
405 generate_completions(mqp);
406
407 return 0;
408}
409
410
411static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
412{
413 struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
414 struct mlx5_ib_ah *ah = to_mah(wr->ah);
415 int qp_index = wr->pkey_index;
416
417 if (!gsi->num_qps)
418 return gsi->rx_qp;
419
420 if (dev->lag_active && ah->xmit_port)
421 qp_index = ah->xmit_port - 1;
422
423 if (qp_index >= gsi->num_qps)
424 return NULL;
425
426 return gsi->tx_qps[qp_index];
427}
428
429int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
430 const struct ib_send_wr **bad_wr)
431{
432 struct mlx5_ib_qp *mqp = to_mqp(qp);
433 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
434 struct ib_qp *tx_qp;
435 unsigned long flags;
436 int ret;
437
438 for (; wr; wr = wr->next) {
439 struct ib_ud_wr cur_wr = *ud_wr(wr);
440
441 cur_wr.wr.next = NULL;
442
443 spin_lock_irqsave(&gsi->lock, flags);
444 tx_qp = get_tx_qp(gsi, &cur_wr);
445 if (!tx_qp) {
446 ret = mlx5_ib_gsi_silent_drop(mqp, &cur_wr);
447 if (ret)
448 goto err;
449 spin_unlock_irqrestore(&gsi->lock, flags);
450 continue;
451 }
452
453 ret = mlx5_ib_add_outstanding_wr(mqp, &cur_wr, NULL);
454 if (ret)
455 goto err;
456
457 ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
458 if (ret) {
459
460 gsi->outstanding_pi--;
461 goto err;
462 }
463 spin_unlock_irqrestore(&gsi->lock, flags);
464 }
465
466 return 0;
467
468err:
469 spin_unlock_irqrestore(&gsi->lock, flags);
470 *bad_wr = wr;
471 return ret;
472}
473
474int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
475 const struct ib_recv_wr **bad_wr)
476{
477 struct mlx5_ib_qp *mqp = to_mqp(qp);
478 struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
479
480 return ib_post_recv(gsi->rx_qp, wr, bad_wr);
481}
482
483void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
484{
485 u16 qp_index;
486
487 for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
488 setup_qp(gsi, qp_index);
489}
490