1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#include <linux/log2.h>
35#include <linux/slab.h>
36#include <linux/netdevice.h>
37
38#include <rdma/ib_cache.h>
39#include <rdma/ib_pack.h>
40#include <rdma/ib_addr.h>
41#include <rdma/ib_mad.h>
42
43#include <linux/mlx4/qp.h>
44
45#include "mlx4_ib.h"
46#include "user.h"
47
48enum {
49 MLX4_IB_ACK_REQ_FREQ = 8,
50};
51
52enum {
53 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
54 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
55 MLX4_IB_LINK_TYPE_IB = 0,
56 MLX4_IB_LINK_TYPE_ETH = 1
57};
58
59enum {
60
61
62
63
64
65
66 MLX4_IB_UD_HEADER_SIZE = 82,
67 MLX4_IB_LSO_HEADER_SPARE = 128,
68};
69
70enum {
71 MLX4_IB_IBOE_ETHERTYPE = 0x8915
72};
73
74struct mlx4_ib_sqp {
75 struct mlx4_ib_qp qp;
76 int pkey_index;
77 u32 qkey;
78 u32 send_psn;
79 struct ib_ud_header ud_header;
80 u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
81};
82
83enum {
84 MLX4_IB_MIN_SQ_STRIDE = 6,
85 MLX4_IB_CACHE_LINE_SIZE = 64,
86};
87
88enum {
89 MLX4_RAW_QP_MTU = 7,
90 MLX4_RAW_QP_MSGMAX = 31,
91};
92
93static const __be32 mlx4_ib_opcode[] = {
94 [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
95 [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
96 [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
97 [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
98 [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
99 [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
100 [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
101 [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
102 [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
103 [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
104 [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
105 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
106 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
107 [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
108};
109
110static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
111{
112 return container_of(mqp, struct mlx4_ib_sqp, qp);
113}
114
115static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
116{
117 if (!mlx4_is_master(dev->dev))
118 return 0;
119
120 return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
121 qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
122 8 * MLX4_MFUNC_MAX;
123}
124
125static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
126{
127 int proxy_sqp = 0;
128 int real_sqp = 0;
129 int i;
130
131 real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
132 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
133 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
134 if (real_sqp)
135 return 1;
136
137 if (mlx4_is_mfunc(dev->dev)) {
138 for (i = 0; i < dev->dev->caps.num_ports; i++) {
139 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
140 qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
141 proxy_sqp = 1;
142 break;
143 }
144 }
145 }
146 return proxy_sqp;
147}
148
149
150static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
151{
152 int proxy_qp0 = 0;
153 int real_qp0 = 0;
154 int i;
155
156 real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
157 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
158 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
159 if (real_qp0)
160 return 1;
161
162 if (mlx4_is_mfunc(dev->dev)) {
163 for (i = 0; i < dev->dev->caps.num_ports; i++) {
164 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
165 proxy_qp0 = 1;
166 break;
167 }
168 }
169 }
170 return proxy_qp0;
171}
172
173static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
174{
175 return mlx4_buf_offset(&qp->buf, offset);
176}
177
178static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
179{
180 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
181}
182
183static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
184{
185 return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
186}
187
188
189
190
191
192
193
194
195
196
197static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
198{
199 __be32 *wqe;
200 int i;
201 int s;
202 int ind;
203 void *buf;
204 __be32 stamp;
205 struct mlx4_wqe_ctrl_seg *ctrl;
206
207 if (qp->sq_max_wqes_per_wr > 1) {
208 s = roundup(size, 1U << qp->sq.wqe_shift);
209 for (i = 0; i < s; i += 64) {
210 ind = (i >> qp->sq.wqe_shift) + n;
211 stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
212 cpu_to_be32(0xffffffff);
213 buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
214 wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
215 *wqe = stamp;
216 }
217 } else {
218 ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
219 s = (ctrl->fence_size & 0x3f) << 4;
220 for (i = 64; i < s; i += 64) {
221 wqe = buf + i;
222 *wqe = cpu_to_be32(0xffffffff);
223 }
224 }
225}
226
227static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
228{
229 struct mlx4_wqe_ctrl_seg *ctrl;
230 struct mlx4_wqe_inline_seg *inl;
231 void *wqe;
232 int s;
233
234 ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
235 s = sizeof(struct mlx4_wqe_ctrl_seg);
236
237 if (qp->ibqp.qp_type == IB_QPT_UD) {
238 struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl;
239 struct mlx4_av *av = (struct mlx4_av *)dgram->av;
240 memset(dgram, 0, sizeof *dgram);
241 av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn);
242 s += sizeof(struct mlx4_wqe_datagram_seg);
243 }
244
245
246 if (size > s) {
247 inl = wqe + s;
248 inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
249 }
250 ctrl->srcrb_flags = 0;
251 ctrl->fence_size = size / 16;
252
253
254
255
256 wmb();
257
258 ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
259 (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
260
261 stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
262}
263
264
265static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
266{
267 unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
268 if (unlikely(s < qp->sq_max_wqes_per_wr)) {
269 post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
270 ind += s;
271 }
272 return ind;
273}
274
275static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
276{
277 struct ib_event event;
278 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
279
280 if (type == MLX4_EVENT_TYPE_PATH_MIG)
281 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
282
283 if (ibqp->event_handler) {
284 event.device = ibqp->device;
285 event.element.qp = ibqp;
286 switch (type) {
287 case MLX4_EVENT_TYPE_PATH_MIG:
288 event.event = IB_EVENT_PATH_MIG;
289 break;
290 case MLX4_EVENT_TYPE_COMM_EST:
291 event.event = IB_EVENT_COMM_EST;
292 break;
293 case MLX4_EVENT_TYPE_SQ_DRAINED:
294 event.event = IB_EVENT_SQ_DRAINED;
295 break;
296 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
297 event.event = IB_EVENT_QP_LAST_WQE_REACHED;
298 break;
299 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
300 event.event = IB_EVENT_QP_FATAL;
301 break;
302 case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
303 event.event = IB_EVENT_PATH_MIG_ERR;
304 break;
305 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
306 event.event = IB_EVENT_QP_REQ_ERR;
307 break;
308 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
309 event.event = IB_EVENT_QP_ACCESS_ERR;
310 break;
311 default:
312 pr_warn("Unexpected event type %d "
313 "on QP %06x\n", type, qp->qpn);
314 return;
315 }
316
317 ibqp->event_handler(&event, ibqp->qp_context);
318 }
319}
320
321static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
322{
323
324
325
326
327
328
329 switch (type) {
330 case MLX4_IB_QPT_UD:
331 return sizeof (struct mlx4_wqe_ctrl_seg) +
332 sizeof (struct mlx4_wqe_datagram_seg) +
333 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
334 case MLX4_IB_QPT_PROXY_SMI_OWNER:
335 case MLX4_IB_QPT_PROXY_SMI:
336 case MLX4_IB_QPT_PROXY_GSI:
337 return sizeof (struct mlx4_wqe_ctrl_seg) +
338 sizeof (struct mlx4_wqe_datagram_seg) + 64;
339 case MLX4_IB_QPT_TUN_SMI_OWNER:
340 case MLX4_IB_QPT_TUN_GSI:
341 return sizeof (struct mlx4_wqe_ctrl_seg) +
342 sizeof (struct mlx4_wqe_datagram_seg);
343
344 case MLX4_IB_QPT_UC:
345 return sizeof (struct mlx4_wqe_ctrl_seg) +
346 sizeof (struct mlx4_wqe_raddr_seg);
347 case MLX4_IB_QPT_RC:
348 return sizeof (struct mlx4_wqe_ctrl_seg) +
349 sizeof (struct mlx4_wqe_atomic_seg) +
350 sizeof (struct mlx4_wqe_raddr_seg);
351 case MLX4_IB_QPT_SMI:
352 case MLX4_IB_QPT_GSI:
353 return sizeof (struct mlx4_wqe_ctrl_seg) +
354 ALIGN(MLX4_IB_UD_HEADER_SIZE +
355 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
356 MLX4_INLINE_ALIGN) *
357 sizeof (struct mlx4_wqe_inline_seg),
358 sizeof (struct mlx4_wqe_data_seg)) +
359 ALIGN(4 +
360 sizeof (struct mlx4_wqe_inline_seg),
361 sizeof (struct mlx4_wqe_data_seg));
362 default:
363 return sizeof (struct mlx4_wqe_ctrl_seg);
364 }
365}
366
367static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
368 int is_user, int has_rq, struct mlx4_ib_qp *qp)
369{
370
371 if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
372 cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))
373 return -EINVAL;
374
375 if (!has_rq) {
376 if (cap->max_recv_wr)
377 return -EINVAL;
378
379 qp->rq.wqe_cnt = qp->rq.max_gs = 0;
380 } else {
381
382 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
383 return -EINVAL;
384
385 qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr));
386 qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
387 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
388 }
389
390
391 if (is_user) {
392 cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt;
393 cap->max_recv_sge = qp->rq.max_gs;
394 } else {
395 cap->max_recv_wr = qp->rq.max_post =
396 min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
397 cap->max_recv_sge = min(qp->rq.max_gs,
398 min(dev->dev->caps.max_sq_sg,
399 dev->dev->caps.max_rq_sg));
400 }
401
402 return 0;
403}
404
405static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
406 enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
407{
408 int s;
409
410
411 if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
412 cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
413 cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
414 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
415 return -EINVAL;
416
417
418
419
420
421 if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
422 type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
423 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
424 return -EINVAL;
425
426 s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
427 cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
428 send_wqe_overhead(type, qp->flags);
429
430 if (s > dev->dev->caps.max_sq_desc_sz)
431 return -EINVAL;
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
465 qp->sq_signal_bits && BITS_PER_LONG == 64 &&
466 type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
467 !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
468 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
469 qp->sq.wqe_shift = ilog2(64);
470 else
471 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
472
473 for (;;) {
474 qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift);
475
476
477
478
479
480 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr;
481 qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
482 qp->sq_max_wqes_per_wr +
483 qp->sq_spare_wqes);
484
485 if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes)
486 break;
487
488 if (qp->sq_max_wqes_per_wr <= 1)
489 return -EINVAL;
490
491 ++qp->sq.wqe_shift;
492 }
493
494 qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz,
495 (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) -
496 send_wqe_overhead(type, qp->flags)) /
497 sizeof (struct mlx4_wqe_data_seg);
498
499 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
500 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
501 if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
502 qp->rq.offset = 0;
503 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
504 } else {
505 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
506 qp->sq.offset = 0;
507 }
508
509 cap->max_send_wr = qp->sq.max_post =
510 (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr;
511 cap->max_send_sge = min(qp->sq.max_gs,
512 min(dev->dev->caps.max_sq_sg,
513 dev->dev->caps.max_rq_sg));
514
515 cap->max_inline_data = 0;
516
517 return 0;
518}
519
520static int set_user_sq_size(struct mlx4_ib_dev *dev,
521 struct mlx4_ib_qp *qp,
522 struct mlx4_ib_create_qp *ucmd)
523{
524
525 if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
526 ucmd->log_sq_stride >
527 ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
528 ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
529 return -EINVAL;
530
531 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
532 qp->sq.wqe_shift = ucmd->log_sq_stride;
533
534 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
535 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
536
537 return 0;
538}
539
540static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
541{
542 int i;
543
544 qp->sqp_proxy_rcv =
545 kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
546 GFP_KERNEL);
547 if (!qp->sqp_proxy_rcv)
548 return -ENOMEM;
549 for (i = 0; i < qp->rq.wqe_cnt; i++) {
550 qp->sqp_proxy_rcv[i].addr =
551 kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
552 GFP_KERNEL);
553 if (!qp->sqp_proxy_rcv[i].addr)
554 goto err;
555 qp->sqp_proxy_rcv[i].map =
556 ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
557 sizeof (struct mlx4_ib_proxy_sqp_hdr),
558 DMA_FROM_DEVICE);
559 }
560 return 0;
561
562err:
563 while (i > 0) {
564 --i;
565 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
566 sizeof (struct mlx4_ib_proxy_sqp_hdr),
567 DMA_FROM_DEVICE);
568 kfree(qp->sqp_proxy_rcv[i].addr);
569 }
570 kfree(qp->sqp_proxy_rcv);
571 qp->sqp_proxy_rcv = NULL;
572 return -ENOMEM;
573}
574
575static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
576{
577 int i;
578
579 for (i = 0; i < qp->rq.wqe_cnt; i++) {
580 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
581 sizeof (struct mlx4_ib_proxy_sqp_hdr),
582 DMA_FROM_DEVICE);
583 kfree(qp->sqp_proxy_rcv[i].addr);
584 }
585 kfree(qp->sqp_proxy_rcv);
586}
587
588static int qp_has_rq(struct ib_qp_init_attr *attr)
589{
590 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
591 return 0;
592
593 return !attr->srq;
594}
595
596static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
597 struct ib_qp_init_attr *init_attr,
598 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
599{
600 int qpn;
601 int err;
602 struct mlx4_ib_sqp *sqp;
603 struct mlx4_ib_qp *qp;
604 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
605
606
607 if (sqpn) {
608 if (mlx4_is_mfunc(dev->dev) &&
609 (!mlx4_is_master(dev->dev) ||
610 !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
611 if (init_attr->qp_type == IB_QPT_GSI)
612 qp_type = MLX4_IB_QPT_PROXY_GSI;
613 else if (mlx4_is_master(dev->dev))
614 qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
615 else
616 qp_type = MLX4_IB_QPT_PROXY_SMI;
617 }
618 qpn = sqpn;
619
620 init_attr->cap.max_recv_sge++;
621 } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
622 struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
623 container_of(init_attr,
624 struct mlx4_ib_qp_tunnel_init_attr, init_attr);
625 if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
626 tnl_init->proxy_qp_type != IB_QPT_GSI) ||
627 !mlx4_is_master(dev->dev))
628 return -EINVAL;
629 if (tnl_init->proxy_qp_type == IB_QPT_GSI)
630 qp_type = MLX4_IB_QPT_TUN_GSI;
631 else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
632 qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
633 else
634 qp_type = MLX4_IB_QPT_TUN_SMI;
635
636
637 qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
638 + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
639 sqpn = qpn;
640 }
641
642 if (!*caller_qp) {
643 if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
644 (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
645 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
646 sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
647 if (!sqp)
648 return -ENOMEM;
649 qp = &sqp->qp;
650 } else {
651 qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
652 if (!qp)
653 return -ENOMEM;
654 }
655 } else
656 qp = *caller_qp;
657
658 qp->mlx4_ib_qp_type = qp_type;
659
660 mutex_init(&qp->mutex);
661 spin_lock_init(&qp->sq.lock);
662 spin_lock_init(&qp->rq.lock);
663 INIT_LIST_HEAD(&qp->gid_list);
664 INIT_LIST_HEAD(&qp->steering_rules);
665
666 qp->state = IB_QPS_RESET;
667 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
668 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
669
670 err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
671 if (err)
672 goto err;
673
674 if (pd->uobject) {
675 struct mlx4_ib_create_qp ucmd;
676
677 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
678 err = -EFAULT;
679 goto err;
680 }
681
682 qp->sq_no_prefetch = ucmd.sq_no_prefetch;
683
684 err = set_user_sq_size(dev, qp, &ucmd);
685 if (err)
686 goto err;
687
688 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
689 qp->buf_size, 0, 0);
690 if (IS_ERR(qp->umem)) {
691 err = PTR_ERR(qp->umem);
692 goto err;
693 }
694
695 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
696 ilog2(qp->umem->page_size), &qp->mtt);
697 if (err)
698 goto err_buf;
699
700 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
701 if (err)
702 goto err_mtt;
703
704 if (qp_has_rq(init_attr)) {
705 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
706 ucmd.db_addr, &qp->db);
707 if (err)
708 goto err_mtt;
709 }
710 } else {
711 qp->sq_no_prefetch = 0;
712
713 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
714 qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
715
716 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
717 qp->flags |= MLX4_IB_QP_LSO;
718
719 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
720 if (err)
721 goto err;
722
723 if (qp_has_rq(init_attr)) {
724 err = mlx4_db_alloc(dev->dev, &qp->db, 0);
725 if (err)
726 goto err;
727
728 *qp->db.db = 0;
729 }
730
731 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
732 err = -ENOMEM;
733 goto err_db;
734 }
735
736 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
737 &qp->mtt);
738 if (err)
739 goto err_buf;
740
741 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
742 if (err)
743 goto err_mtt;
744
745 qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
746 qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
747
748 if (!qp->sq.wrid || !qp->rq.wrid) {
749 err = -ENOMEM;
750 goto err_wrid;
751 }
752 }
753
754 if (sqpn) {
755 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
756 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
757 if (alloc_proxy_bufs(pd->device, qp)) {
758 err = -ENOMEM;
759 goto err_wrid;
760 }
761 }
762 } else {
763
764
765 if (init_attr->qp_type == IB_QPT_RAW_PACKET)
766 err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
767 else
768 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
769 if (err)
770 goto err_proxy;
771 }
772
773 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
774 if (err)
775 goto err_qpn;
776
777 if (init_attr->qp_type == IB_QPT_XRC_TGT)
778 qp->mqp.qpn |= (1 << 23);
779
780
781
782
783
784
785 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
786
787 qp->mqp.event = mlx4_ib_qp_event;
788 if (!*caller_qp)
789 *caller_qp = qp;
790 return 0;
791
792err_qpn:
793 if (!sqpn)
794 mlx4_qp_release_range(dev->dev, qpn, 1);
795err_proxy:
796 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
797 free_proxy_bufs(pd->device, qp);
798err_wrid:
799 if (pd->uobject) {
800 if (qp_has_rq(init_attr))
801 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
802 } else {
803 kfree(qp->sq.wrid);
804 kfree(qp->rq.wrid);
805 }
806
807err_mtt:
808 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
809
810err_buf:
811 if (pd->uobject)
812 ib_umem_release(qp->umem);
813 else
814 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
815
816err_db:
817 if (!pd->uobject && qp_has_rq(init_attr))
818 mlx4_db_free(dev->dev, &qp->db);
819
820err:
821 if (!*caller_qp)
822 kfree(qp);
823 return err;
824}
825
826static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
827{
828 switch (state) {
829 case IB_QPS_RESET: return MLX4_QP_STATE_RST;
830 case IB_QPS_INIT: return MLX4_QP_STATE_INIT;
831 case IB_QPS_RTR: return MLX4_QP_STATE_RTR;
832 case IB_QPS_RTS: return MLX4_QP_STATE_RTS;
833 case IB_QPS_SQD: return MLX4_QP_STATE_SQD;
834 case IB_QPS_SQE: return MLX4_QP_STATE_SQER;
835 case IB_QPS_ERR: return MLX4_QP_STATE_ERR;
836 default: return -1;
837 }
838}
839
840static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
841 __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
842{
843 if (send_cq == recv_cq) {
844 spin_lock_irq(&send_cq->lock);
845 __acquire(&recv_cq->lock);
846 } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
847 spin_lock_irq(&send_cq->lock);
848 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
849 } else {
850 spin_lock_irq(&recv_cq->lock);
851 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
852 }
853}
854
855static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
856 __releases(&send_cq->lock) __releases(&recv_cq->lock)
857{
858 if (send_cq == recv_cq) {
859 __release(&recv_cq->lock);
860 spin_unlock_irq(&send_cq->lock);
861 } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
862 spin_unlock(&recv_cq->lock);
863 spin_unlock_irq(&send_cq->lock);
864 } else {
865 spin_unlock(&send_cq->lock);
866 spin_unlock_irq(&recv_cq->lock);
867 }
868}
869
870static void del_gid_entries(struct mlx4_ib_qp *qp)
871{
872 struct mlx4_ib_gid_entry *ge, *tmp;
873
874 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
875 list_del(&ge->list);
876 kfree(ge);
877 }
878}
879
880static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
881{
882 if (qp->ibqp.qp_type == IB_QPT_XRC_TGT)
883 return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd);
884 else
885 return to_mpd(qp->ibqp.pd);
886}
887
888static void get_cqs(struct mlx4_ib_qp *qp,
889 struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
890{
891 switch (qp->ibqp.qp_type) {
892 case IB_QPT_XRC_TGT:
893 *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq);
894 *recv_cq = *send_cq;
895 break;
896 case IB_QPT_XRC_INI:
897 *send_cq = to_mcq(qp->ibqp.send_cq);
898 *recv_cq = *send_cq;
899 break;
900 default:
901 *send_cq = to_mcq(qp->ibqp.send_cq);
902 *recv_cq = to_mcq(qp->ibqp.recv_cq);
903 break;
904 }
905}
906
907static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
908 int is_user)
909{
910 struct mlx4_ib_cq *send_cq, *recv_cq;
911
912 if (qp->state != IB_QPS_RESET)
913 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
914 MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
915 pr_warn("modify QP %06x to RESET failed.\n",
916 qp->mqp.qpn);
917
918 get_cqs(qp, &send_cq, &recv_cq);
919
920 mlx4_ib_lock_cqs(send_cq, recv_cq);
921
922 if (!is_user) {
923 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
924 qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
925 if (send_cq != recv_cq)
926 __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
927 }
928
929 mlx4_qp_remove(dev->dev, &qp->mqp);
930
931 mlx4_ib_unlock_cqs(send_cq, recv_cq);
932
933 mlx4_qp_free(dev->dev, &qp->mqp);
934
935 if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
936 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
937
938 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
939
940 if (is_user) {
941 if (qp->rq.wqe_cnt)
942 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
943 &qp->db);
944 ib_umem_release(qp->umem);
945 } else {
946 kfree(qp->sq.wrid);
947 kfree(qp->rq.wrid);
948 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
949 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
950 free_proxy_bufs(&dev->ib_dev, qp);
951 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
952 if (qp->rq.wqe_cnt)
953 mlx4_db_free(dev->dev, &qp->db);
954 }
955
956 del_gid_entries(qp);
957}
958
959static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
960{
961
962 if (!mlx4_is_mfunc(dev->dev) ||
963 (mlx4_is_master(dev->dev) &&
964 attr->create_flags & MLX4_IB_SRIOV_SQP)) {
965 return dev->dev->phys_caps.base_sqpn +
966 (attr->qp_type == IB_QPT_SMI ? 0 : 2) +
967 attr->port_num - 1;
968 }
969
970 if (attr->qp_type == IB_QPT_SMI)
971 return dev->dev->caps.qp0_proxy[attr->port_num - 1];
972 else
973 return dev->dev->caps.qp1_proxy[attr->port_num - 1];
974}
975
976struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
977 struct ib_qp_init_attr *init_attr,
978 struct ib_udata *udata)
979{
980 struct mlx4_ib_qp *qp = NULL;
981 int err;
982 u16 xrcdn = 0;
983
984
985
986
987
988 if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
989 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
990 MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
991 return ERR_PTR(-EINVAL);
992
993 if (init_attr->create_flags &&
994 (udata ||
995 ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
996 init_attr->qp_type != IB_QPT_UD) ||
997 ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
998 init_attr->qp_type > IB_QPT_GSI)))
999 return ERR_PTR(-EINVAL);
1000
1001 switch (init_attr->qp_type) {
1002 case IB_QPT_XRC_TGT:
1003 pd = to_mxrcd(init_attr->xrcd)->pd;
1004 xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
1005 init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
1006
1007 case IB_QPT_XRC_INI:
1008 if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
1009 return ERR_PTR(-ENOSYS);
1010 init_attr->recv_cq = init_attr->send_cq;
1011
1012 case IB_QPT_RC:
1013 case IB_QPT_UC:
1014 case IB_QPT_RAW_PACKET:
1015 qp = kzalloc(sizeof *qp, GFP_KERNEL);
1016 if (!qp)
1017 return ERR_PTR(-ENOMEM);
1018
1019 case IB_QPT_UD:
1020 {
1021 err = create_qp_common(to_mdev(pd->device), pd, init_attr,
1022 udata, 0, &qp);
1023 if (err)
1024 return ERR_PTR(err);
1025
1026 qp->ibqp.qp_num = qp->mqp.qpn;
1027 qp->xrcdn = xrcdn;
1028
1029 break;
1030 }
1031 case IB_QPT_SMI:
1032 case IB_QPT_GSI:
1033 {
1034
1035 if (udata)
1036 return ERR_PTR(-EINVAL);
1037
1038 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
1039 get_sqp_num(to_mdev(pd->device), init_attr),
1040 &qp);
1041 if (err)
1042 return ERR_PTR(err);
1043
1044 qp->port = init_attr->port_num;
1045 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
1046
1047 break;
1048 }
1049 default:
1050
1051 return ERR_PTR(-EINVAL);
1052 }
1053
1054 return &qp->ibqp;
1055}
1056
1057int mlx4_ib_destroy_qp(struct ib_qp *qp)
1058{
1059 struct mlx4_ib_dev *dev = to_mdev(qp->device);
1060 struct mlx4_ib_qp *mqp = to_mqp(qp);
1061 struct mlx4_ib_pd *pd;
1062
1063 if (is_qp0(dev, mqp))
1064 mlx4_CLOSE_PORT(dev->dev, mqp->port);
1065
1066 pd = get_pd(mqp);
1067 destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
1068
1069 if (is_sqp(dev, mqp))
1070 kfree(to_msqp(mqp));
1071 else
1072 kfree(mqp);
1073
1074 return 0;
1075}
1076
1077static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
1078{
1079 switch (type) {
1080 case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC;
1081 case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC;
1082 case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD;
1083 case MLX4_IB_QPT_XRC_INI:
1084 case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC;
1085 case MLX4_IB_QPT_SMI:
1086 case MLX4_IB_QPT_GSI:
1087 case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
1088
1089 case MLX4_IB_QPT_PROXY_SMI_OWNER:
1090 case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
1091 MLX4_QP_ST_MLX : -1);
1092 case MLX4_IB_QPT_PROXY_SMI:
1093 case MLX4_IB_QPT_TUN_SMI:
1094 case MLX4_IB_QPT_PROXY_GSI:
1095 case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ?
1096 MLX4_QP_ST_UD : -1);
1097 default: return -1;
1098 }
1099}
1100
1101static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
1102 int attr_mask)
1103{
1104 u8 dest_rd_atomic;
1105 u32 access_flags;
1106 u32 hw_access_flags = 0;
1107
1108 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1109 dest_rd_atomic = attr->max_dest_rd_atomic;
1110 else
1111 dest_rd_atomic = qp->resp_depth;
1112
1113 if (attr_mask & IB_QP_ACCESS_FLAGS)
1114 access_flags = attr->qp_access_flags;
1115 else
1116 access_flags = qp->atomic_rd_en;
1117
1118 if (!dest_rd_atomic)
1119 access_flags &= IB_ACCESS_REMOTE_WRITE;
1120
1121 if (access_flags & IB_ACCESS_REMOTE_READ)
1122 hw_access_flags |= MLX4_QP_BIT_RRE;
1123 if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
1124 hw_access_flags |= MLX4_QP_BIT_RAE;
1125 if (access_flags & IB_ACCESS_REMOTE_WRITE)
1126 hw_access_flags |= MLX4_QP_BIT_RWE;
1127
1128 return cpu_to_be32(hw_access_flags);
1129}
1130
1131static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
1132 int attr_mask)
1133{
1134 if (attr_mask & IB_QP_PKEY_INDEX)
1135 sqp->pkey_index = attr->pkey_index;
1136 if (attr_mask & IB_QP_QKEY)
1137 sqp->qkey = attr->qkey;
1138 if (attr_mask & IB_QP_SQ_PSN)
1139 sqp->send_psn = attr->sq_psn;
1140}
1141
1142static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
1143{
1144 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
1145}
1146
1147static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
1148 struct mlx4_qp_path *path, u8 port)
1149{
1150 int err;
1151 int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
1152 IB_LINK_LAYER_ETHERNET;
1153 u8 mac[6];
1154 int is_mcast;
1155 u16 vlan_tag;
1156 int vidx;
1157
1158 path->grh_mylmc = ah->src_path_bits & 0x7f;
1159 path->rlid = cpu_to_be16(ah->dlid);
1160 if (ah->static_rate) {
1161 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
1162 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
1163 !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
1164 --path->static_rate;
1165 } else
1166 path->static_rate = 0;
1167
1168 if (ah->ah_flags & IB_AH_GRH) {
1169 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
1170 pr_err("sgid_index (%u) too large. max is %d\n",
1171 ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
1172 return -1;
1173 }
1174
1175 path->grh_mylmc |= 1 << 7;
1176 path->mgid_index = ah->grh.sgid_index;
1177 path->hop_limit = ah->grh.hop_limit;
1178 path->tclass_flowlabel =
1179 cpu_to_be32((ah->grh.traffic_class << 20) |
1180 (ah->grh.flow_label));
1181 memcpy(path->rgid, ah->grh.dgid.raw, 16);
1182 }
1183
1184 if (is_eth) {
1185 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
1186 ((port - 1) << 6) | ((ah->sl & 7) << 3);
1187
1188 if (!(ah->ah_flags & IB_AH_GRH))
1189 return -1;
1190
1191 err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
1192 if (err)
1193 return err;
1194
1195 memcpy(path->dmac, mac, 6);
1196 path->ackto = MLX4_IB_LINK_TYPE_ETH;
1197
1198 path->grh_mylmc &= 0x80;
1199
1200 vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
1201 if (vlan_tag < 0x1000) {
1202 if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
1203 return -ENOENT;
1204
1205 path->vlan_index = vidx;
1206 path->fl = 1 << 6;
1207 }
1208 } else
1209 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
1210 ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
1211
1212 return 0;
1213}
1214
1215static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
1216{
1217 struct mlx4_ib_gid_entry *ge, *tmp;
1218
1219 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
1220 if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) {
1221 ge->added = 1;
1222 ge->port = qp->port;
1223 }
1224 }
1225}
1226
1227static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1228 const struct ib_qp_attr *attr, int attr_mask,
1229 enum ib_qp_state cur_state, enum ib_qp_state new_state)
1230{
1231 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
1232 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1233 struct mlx4_ib_pd *pd;
1234 struct mlx4_ib_cq *send_cq, *recv_cq;
1235 struct mlx4_qp_context *context;
1236 enum mlx4_qp_optpar optpar = 0;
1237 int sqd_event;
1238 int err = -EINVAL;
1239
1240 context = kzalloc(sizeof *context, GFP_KERNEL);
1241 if (!context)
1242 return -ENOMEM;
1243
1244 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
1245 (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
1246
1247 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
1248 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
1249 else {
1250 optpar |= MLX4_QP_OPTPAR_PM_STATE;
1251 switch (attr->path_mig_state) {
1252 case IB_MIG_MIGRATED:
1253 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
1254 break;
1255 case IB_MIG_REARM:
1256 context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
1257 break;
1258 case IB_MIG_ARMED:
1259 context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
1260 break;
1261 }
1262 }
1263
1264 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
1265 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
1266 else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1267 context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
1268 else if (ibqp->qp_type == IB_QPT_UD) {
1269 if (qp->flags & MLX4_IB_QP_LSO)
1270 context->mtu_msgmax = (IB_MTU_4096 << 5) |
1271 ilog2(dev->dev->caps.max_gso_sz);
1272 else
1273 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
1274 } else if (attr_mask & IB_QP_PATH_MTU) {
1275 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
1276 pr_err("path MTU (%u) is invalid\n",
1277 attr->path_mtu);
1278 goto out;
1279 }
1280 context->mtu_msgmax = (attr->path_mtu << 5) |
1281 ilog2(dev->dev->caps.max_msg_sz);
1282 }
1283
1284 if (qp->rq.wqe_cnt)
1285 context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
1286 context->rq_size_stride |= qp->rq.wqe_shift - 4;
1287
1288 if (qp->sq.wqe_cnt)
1289 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
1290 context->sq_size_stride |= qp->sq.wqe_shift - 4;
1291
1292 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
1293 context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
1294 context->xrcd = cpu_to_be32((u32) qp->xrcdn);
1295 if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1296 context->param3 |= cpu_to_be32(1 << 30);
1297 }
1298
1299 if (qp->ibqp.uobject)
1300 context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
1301 else
1302 context->usr_page = cpu_to_be32(dev->priv_uar.index);
1303
1304 if (attr_mask & IB_QP_DEST_QPN)
1305 context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
1306
1307 if (attr_mask & IB_QP_PORT) {
1308 if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
1309 !(attr_mask & IB_QP_AV)) {
1310 mlx4_set_sched(&context->pri_path, attr->port_num);
1311 optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
1312 }
1313 }
1314
1315 if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
1316 if (dev->counters[qp->port - 1] != -1) {
1317 context->pri_path.counter_index =
1318 dev->counters[qp->port - 1];
1319 optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
1320 } else
1321 context->pri_path.counter_index = 0xff;
1322 }
1323
1324 if (attr_mask & IB_QP_PKEY_INDEX) {
1325 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1326 context->pri_path.disable_pkey_check = 0x40;
1327 context->pri_path.pkey_index = attr->pkey_index;
1328 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
1329 }
1330
1331 if (attr_mask & IB_QP_AV) {
1332 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
1333 attr_mask & IB_QP_PORT ?
1334 attr->port_num : qp->port))
1335 goto out;
1336
1337 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
1338 MLX4_QP_OPTPAR_SCHED_QUEUE);
1339 }
1340
1341 if (attr_mask & IB_QP_TIMEOUT) {
1342 context->pri_path.ackto |= attr->timeout << 3;
1343 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
1344 }
1345
1346 if (attr_mask & IB_QP_ALT_PATH) {
1347 if (attr->alt_port_num == 0 ||
1348 attr->alt_port_num > dev->dev->caps.num_ports)
1349 goto out;
1350
1351 if (attr->alt_pkey_index >=
1352 dev->dev->caps.pkey_table_len[attr->alt_port_num])
1353 goto out;
1354
1355 if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
1356 attr->alt_port_num))
1357 goto out;
1358
1359 context->alt_path.pkey_index = attr->alt_pkey_index;
1360 context->alt_path.ackto = attr->alt_timeout << 3;
1361 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
1362 }
1363
1364 pd = get_pd(qp);
1365 get_cqs(qp, &send_cq, &recv_cq);
1366 context->pd = cpu_to_be32(pd->pdn);
1367 context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
1368 context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
1369 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
1370
1371
1372 if (!qp->ibqp.uobject)
1373 context->params1 |= cpu_to_be32(1 << 11);
1374
1375 if (attr_mask & IB_QP_RNR_RETRY) {
1376 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
1377 optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
1378 }
1379
1380 if (attr_mask & IB_QP_RETRY_CNT) {
1381 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
1382 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
1383 }
1384
1385 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1386 if (attr->max_rd_atomic)
1387 context->params1 |=
1388 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
1389 optpar |= MLX4_QP_OPTPAR_SRA_MAX;
1390 }
1391
1392 if (attr_mask & IB_QP_SQ_PSN)
1393 context->next_send_psn = cpu_to_be32(attr->sq_psn);
1394
1395 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1396 if (attr->max_dest_rd_atomic)
1397 context->params2 |=
1398 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
1399 optpar |= MLX4_QP_OPTPAR_RRA_MAX;
1400 }
1401
1402 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
1403 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
1404 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
1405 }
1406
1407 if (ibqp->srq)
1408 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
1409
1410 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1411 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
1412 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
1413 }
1414 if (attr_mask & IB_QP_RQ_PSN)
1415 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
1416
1417
1418 if (attr_mask & IB_QP_QKEY) {
1419 if (qp->mlx4_ib_qp_type &
1420 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
1421 context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
1422 else {
1423 if (mlx4_is_mfunc(dev->dev) &&
1424 !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
1425 (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
1426 MLX4_RESERVED_QKEY_BASE) {
1427 pr_err("Cannot use reserved QKEY"
1428 " 0x%x (range 0xffff0000..0xffffffff"
1429 " is reserved)\n", attr->qkey);
1430 err = -EINVAL;
1431 goto out;
1432 }
1433 context->qkey = cpu_to_be32(attr->qkey);
1434 }
1435 optpar |= MLX4_QP_OPTPAR_Q_KEY;
1436 }
1437
1438 if (ibqp->srq)
1439 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
1440
1441 if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1442 context->db_rec_addr = cpu_to_be64(qp->db.dma);
1443
1444 if (cur_state == IB_QPS_INIT &&
1445 new_state == IB_QPS_RTR &&
1446 (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
1447 ibqp->qp_type == IB_QPT_UD ||
1448 ibqp->qp_type == IB_QPT_RAW_PACKET)) {
1449 context->pri_path.sched_queue = (qp->port - 1) << 6;
1450 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
1451 qp->mlx4_ib_qp_type &
1452 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
1453 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
1454 if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
1455 context->pri_path.fl = 0x80;
1456 } else {
1457 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1458 context->pri_path.fl = 0x80;
1459 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
1460 }
1461 }
1462
1463 if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
1464 context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
1465 MLX4_IB_LINK_TYPE_ETH;
1466
1467 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
1468 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
1469 sqd_event = 1;
1470 else
1471 sqd_event = 0;
1472
1473 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1474 context->rlkey |= (1 << 4);
1475
1476
1477
1478
1479
1480
1481
1482 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
1483 struct mlx4_wqe_ctrl_seg *ctrl;
1484 int i;
1485
1486 for (i = 0; i < qp->sq.wqe_cnt; ++i) {
1487 ctrl = get_send_wqe(qp, i);
1488 ctrl->owner_opcode = cpu_to_be32(1 << 31);
1489 if (qp->sq_max_wqes_per_wr == 1)
1490 ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
1491
1492 stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
1493 }
1494 }
1495
1496 err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
1497 to_mlx4_state(new_state), context, optpar,
1498 sqd_event, &qp->mqp);
1499 if (err)
1500 goto out;
1501
1502 qp->state = new_state;
1503
1504 if (attr_mask & IB_QP_ACCESS_FLAGS)
1505 qp->atomic_rd_en = attr->qp_access_flags;
1506 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1507 qp->resp_depth = attr->max_dest_rd_atomic;
1508 if (attr_mask & IB_QP_PORT) {
1509 qp->port = attr->port_num;
1510 update_mcg_macs(dev, qp);
1511 }
1512 if (attr_mask & IB_QP_ALT_PATH)
1513 qp->alt_port = attr->alt_port_num;
1514
1515 if (is_sqp(dev, qp))
1516 store_sqp_attrs(to_msqp(qp), attr, attr_mask);
1517
1518
1519
1520
1521
1522 if (is_qp0(dev, qp)) {
1523 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
1524 if (mlx4_INIT_PORT(dev->dev, qp->port))
1525 pr_warn("INIT_PORT failed for port %d\n",
1526 qp->port);
1527
1528 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
1529 (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
1530 mlx4_CLOSE_PORT(dev->dev, qp->port);
1531 }
1532
1533
1534
1535
1536
1537 if (new_state == IB_QPS_RESET && !ibqp->uobject) {
1538 mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
1539 ibqp->srq ? to_msrq(ibqp->srq): NULL);
1540 if (send_cq != recv_cq)
1541 mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1542
1543 qp->rq.head = 0;
1544 qp->rq.tail = 0;
1545 qp->sq.head = 0;
1546 qp->sq.tail = 0;
1547 qp->sq_next_wqe = 0;
1548 if (qp->rq.wqe_cnt)
1549 *qp->db.db = 0;
1550 }
1551
1552out:
1553 kfree(context);
1554 return err;
1555}
1556
1557int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1558 int attr_mask, struct ib_udata *udata)
1559{
1560 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
1561 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1562 enum ib_qp_state cur_state, new_state;
1563 int err = -EINVAL;
1564
1565 mutex_lock(&qp->mutex);
1566
1567 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
1568 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1569
1570 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
1571 pr_debug("qpn 0x%x: invalid attribute mask specified "
1572 "for transition %d to %d. qp_type %d,"
1573 " attr_mask 0x%x\n",
1574 ibqp->qp_num, cur_state, new_state,
1575 ibqp->qp_type, attr_mask);
1576 goto out;
1577 }
1578
1579 if ((attr_mask & IB_QP_PORT) &&
1580 (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
1581 pr_debug("qpn 0x%x: invalid port number (%d) specified "
1582 "for transition %d to %d. qp_type %d\n",
1583 ibqp->qp_num, attr->port_num, cur_state,
1584 new_state, ibqp->qp_type);
1585 goto out;
1586 }
1587
1588 if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
1589 (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
1590 IB_LINK_LAYER_ETHERNET))
1591 goto out;
1592
1593 if (attr_mask & IB_QP_PKEY_INDEX) {
1594 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1595 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) {
1596 pr_debug("qpn 0x%x: invalid pkey index (%d) specified "
1597 "for transition %d to %d. qp_type %d\n",
1598 ibqp->qp_num, attr->pkey_index, cur_state,
1599 new_state, ibqp->qp_type);
1600 goto out;
1601 }
1602 }
1603
1604 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1605 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
1606 pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. "
1607 "Transition %d to %d. qp_type %d\n",
1608 ibqp->qp_num, attr->max_rd_atomic, cur_state,
1609 new_state, ibqp->qp_type);
1610 goto out;
1611 }
1612
1613 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1614 attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
1615 pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
1616 "Transition %d to %d. qp_type %d\n",
1617 ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
1618 new_state, ibqp->qp_type);
1619 goto out;
1620 }
1621
1622 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1623 err = 0;
1624 goto out;
1625 }
1626
1627 err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
1628
1629out:
1630 mutex_unlock(&qp->mutex);
1631 return err;
1632}
1633
1634static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
1635 struct ib_send_wr *wr,
1636 void *wqe, unsigned *mlx_seg_len)
1637{
1638 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
1639 struct ib_device *ib_dev = &mdev->ib_dev;
1640 struct mlx4_wqe_mlx_seg *mlx = wqe;
1641 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1642 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
1643 u16 pkey;
1644 u32 qkey;
1645 int send_size;
1646 int header_size;
1647 int spc;
1648 int i;
1649
1650 if (wr->opcode != IB_WR_SEND)
1651 return -EINVAL;
1652
1653 send_size = 0;
1654
1655 for (i = 0; i < wr->num_sge; ++i)
1656 send_size += wr->sg_list[i].length;
1657
1658
1659
1660 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
1661 send_size += sizeof (struct mlx4_ib_tunnel_header);
1662
1663 ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
1664
1665 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
1666 sqp->ud_header.lrh.service_level =
1667 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
1668 sqp->ud_header.lrh.destination_lid =
1669 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1670 sqp->ud_header.lrh.source_lid =
1671 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1672 }
1673
1674 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
1675
1676
1677 mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
1678 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1679
1680 sqp->ud_header.lrh.virtual_lane = 0;
1681 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1682 ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
1683 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1684 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
1685 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1686 else
1687 sqp->ud_header.bth.destination_qpn =
1688 cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
1689
1690 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1691 if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
1692 return -EINVAL;
1693 sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
1694 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
1695
1696 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1697 sqp->ud_header.immediate_present = 0;
1698
1699 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
1700
1701
1702
1703
1704
1705
1706
1707 spc = MLX4_INLINE_ALIGN -
1708 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
1709 if (header_size <= spc) {
1710 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1711 memcpy(inl + 1, sqp->header_buf, header_size);
1712 i = 1;
1713 } else {
1714 inl->byte_count = cpu_to_be32(1 << 31 | spc);
1715 memcpy(inl + 1, sqp->header_buf, spc);
1716
1717 inl = (void *) (inl + 1) + spc;
1718 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732 wmb();
1733 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
1734 i = 2;
1735 }
1736
1737 *mlx_seg_len =
1738 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1739 return 0;
1740}
1741
1742static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1743 void *wqe, unsigned *mlx_seg_len)
1744{
1745 struct ib_device *ib_dev = sqp->qp.ibqp.device;
1746 struct mlx4_wqe_mlx_seg *mlx = wqe;
1747 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1748 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
1749 struct net_device *ndev;
1750 union ib_gid sgid;
1751 u16 pkey;
1752 int send_size;
1753 int header_size;
1754 int spc;
1755 int i;
1756 int err = 0;
1757 u16 vlan = 0xffff;
1758 bool is_eth;
1759 bool is_vlan = false;
1760 bool is_grh;
1761
1762 send_size = 0;
1763 for (i = 0; i < wr->num_sge; ++i)
1764 send_size += wr->sg_list[i].length;
1765
1766 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
1767 is_grh = mlx4_ib_ah_grh_present(ah);
1768 if (is_eth) {
1769 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1770
1771
1772
1773 sgid.global.subnet_prefix =
1774 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1775 subnet_prefix;
1776 sgid.global.interface_id =
1777 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1778 guid_cache[ah->av.ib.gid_index];
1779 } else {
1780 err = ib_get_cached_gid(ib_dev,
1781 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1782 ah->av.ib.gid_index, &sgid);
1783 if (err)
1784 return err;
1785 }
1786
1787 vlan = rdma_get_vlan_id(&sgid);
1788 is_vlan = vlan < 0x1000;
1789 }
1790 ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
1791
1792 if (!is_eth) {
1793 sqp->ud_header.lrh.service_level =
1794 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
1795 sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid;
1796 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1797 }
1798
1799 if (is_grh) {
1800 sqp->ud_header.grh.traffic_class =
1801 (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
1802 sqp->ud_header.grh.flow_label =
1803 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1804 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
1805 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1806
1807
1808
1809 sqp->ud_header.grh.source_gid.global.subnet_prefix =
1810 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1811 subnet_prefix;
1812 sqp->ud_header.grh.source_gid.global.interface_id =
1813 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1814 guid_cache[ah->av.ib.gid_index];
1815 } else
1816 ib_get_cached_gid(ib_dev,
1817 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1818 ah->av.ib.gid_index,
1819 &sqp->ud_header.grh.source_gid);
1820 memcpy(sqp->ud_header.grh.destination_gid.raw,
1821 ah->av.ib.dgid, 16);
1822 }
1823
1824 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
1825
1826 if (!is_eth) {
1827 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
1828 (sqp->ud_header.lrh.destination_lid ==
1829 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
1830 (sqp->ud_header.lrh.service_level << 8));
1831 if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
1832 mlx->flags |= cpu_to_be32(0x1);
1833 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1834 }
1835
1836 switch (wr->opcode) {
1837 case IB_WR_SEND:
1838 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1839 sqp->ud_header.immediate_present = 0;
1840 break;
1841 case IB_WR_SEND_WITH_IMM:
1842 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
1843 sqp->ud_header.immediate_present = 1;
1844 sqp->ud_header.immediate_data = wr->ex.imm_data;
1845 break;
1846 default:
1847 return -EINVAL;
1848 }
1849
1850 if (is_eth) {
1851 u8 *smac;
1852 u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
1853
1854 mlx->sched_prio = cpu_to_be16(pcp);
1855
1856 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
1857
1858 ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1];
1859 if (!ndev)
1860 return -ENODEV;
1861 smac = ndev->dev_addr;
1862 memcpy(sqp->ud_header.eth.smac_h, smac, 6);
1863 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
1864 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
1865 if (!is_vlan) {
1866 sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
1867 } else {
1868 sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
1869 sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
1870 }
1871 } else {
1872 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
1873 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
1874 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
1875 }
1876 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1877 if (!sqp->qp.ibqp.qp_num)
1878 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
1879 else
1880 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
1881 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1882 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1883 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1884 sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
1885 sqp->qkey : wr->wr.ud.remote_qkey);
1886 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
1887
1888 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
1889
1890 if (0) {
1891 pr_err("built UD header of size %d:\n", header_size);
1892 for (i = 0; i < header_size / 4; ++i) {
1893 if (i % 8 == 0)
1894 pr_err(" [%02x] ", i * 4);
1895 pr_cont(" %08x",
1896 be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
1897 if ((i + 1) % 8 == 0)
1898 pr_cont("\n");
1899 }
1900 pr_err("\n");
1901 }
1902
1903
1904
1905
1906
1907
1908
1909 spc = MLX4_INLINE_ALIGN -
1910 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
1911 if (header_size <= spc) {
1912 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1913 memcpy(inl + 1, sqp->header_buf, header_size);
1914 i = 1;
1915 } else {
1916 inl->byte_count = cpu_to_be32(1 << 31 | spc);
1917 memcpy(inl + 1, sqp->header_buf, spc);
1918
1919 inl = (void *) (inl + 1) + spc;
1920 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934 wmb();
1935 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
1936 i = 2;
1937 }
1938
1939 *mlx_seg_len =
1940 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1941 return 0;
1942}
1943
1944static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
1945{
1946 unsigned cur;
1947 struct mlx4_ib_cq *cq;
1948
1949 cur = wq->head - wq->tail;
1950 if (likely(cur + nreq < wq->max_post))
1951 return 0;
1952
1953 cq = to_mcq(ib_cq);
1954 spin_lock(&cq->lock);
1955 cur = wq->head - wq->tail;
1956 spin_unlock(&cq->lock);
1957
1958 return cur + nreq >= wq->max_post;
1959}
1960
1961static __be32 convert_access(int acc)
1962{
1963 return (acc & IB_ACCESS_REMOTE_ATOMIC ?
1964 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) |
1965 (acc & IB_ACCESS_REMOTE_WRITE ?
1966 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
1967 (acc & IB_ACCESS_REMOTE_READ ?
1968 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) |
1969 (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) |
1970 cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
1971}
1972
1973static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
1974{
1975 struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
1976 int i;
1977
1978 for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
1979 mfrpl->mapped_page_list[i] =
1980 cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
1981 MLX4_MTT_FLAG_PRESENT);
1982
1983 fseg->flags = convert_access(wr->wr.fast_reg.access_flags);
1984 fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey);
1985 fseg->buf_list = cpu_to_be64(mfrpl->map);
1986 fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1987 fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length);
1988 fseg->offset = 0;
1989 fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift);
1990 fseg->reserved[0] = 0;
1991 fseg->reserved[1] = 0;
1992}
1993
1994static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
1995{
1996 bseg->flags1 =
1997 convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
1998 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
1999 MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
2000 MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
2001 bseg->flags2 = 0;
2002 if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
2003 bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
2004 if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
2005 bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
2006 bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
2007 bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
2008 bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
2009 bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
2010}
2011
2012static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
2013{
2014 memset(iseg, 0, sizeof(*iseg));
2015 iseg->mem_key = cpu_to_be32(rkey);
2016}
2017
2018static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
2019 u64 remote_addr, u32 rkey)
2020{
2021 rseg->raddr = cpu_to_be64(remote_addr);
2022 rseg->rkey = cpu_to_be32(rkey);
2023 rseg->reserved = 0;
2024}
2025
2026static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
2027{
2028 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
2029 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
2030 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
2031 } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
2032 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
2033 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
2034 } else {
2035 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
2036 aseg->compare = 0;
2037 }
2038
2039}
2040
2041static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
2042 struct ib_send_wr *wr)
2043{
2044 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
2045 aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
2046 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
2047 aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
2048}
2049
2050static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
2051 struct ib_send_wr *wr)
2052{
2053 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
2054 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2055 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2056 dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
2057 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
2058}
2059
2060static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
2061 struct mlx4_wqe_datagram_seg *dseg,
2062 struct ib_send_wr *wr, enum ib_qp_type qpt)
2063{
2064 union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
2065 struct mlx4_av sqp_av = {0};
2066 int port = *((u8 *) &av->ib.port_pd) & 0x3;
2067
2068
2069 sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
2070 sqp_av.g_slid = av->ib.g_slid & 0x7f;
2071 sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
2072 cpu_to_be32(0xf0000000);
2073
2074 memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
2075
2076 dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
2077
2078 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
2079}
2080
2081static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
2082{
2083 struct mlx4_wqe_inline_seg *inl = wqe;
2084 struct mlx4_ib_tunnel_header hdr;
2085 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
2086 int spc;
2087 int i;
2088
2089 memcpy(&hdr.av, &ah->av, sizeof hdr.av);
2090 hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2091 hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
2092 hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2093
2094 spc = MLX4_INLINE_ALIGN -
2095 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
2096 if (sizeof (hdr) <= spc) {
2097 memcpy(inl + 1, &hdr, sizeof (hdr));
2098 wmb();
2099 inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
2100 i = 1;
2101 } else {
2102 memcpy(inl + 1, &hdr, spc);
2103 wmb();
2104 inl->byte_count = cpu_to_be32(1 << 31 | spc);
2105
2106 inl = (void *) (inl + 1) + spc;
2107 memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
2108 wmb();
2109 inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
2110 i = 2;
2111 }
2112
2113 *mlx_seg_len =
2114 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
2115}
2116
2117static void set_mlx_icrc_seg(void *dseg)
2118{
2119 u32 *t = dseg;
2120 struct mlx4_wqe_inline_seg *iseg = dseg;
2121
2122 t[1] = 0;
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132 wmb();
2133
2134 iseg->byte_count = cpu_to_be32((1 << 31) | 4);
2135}
2136
2137static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
2138{
2139 dseg->lkey = cpu_to_be32(sg->lkey);
2140 dseg->addr = cpu_to_be64(sg->addr);
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150 wmb();
2151
2152 dseg->byte_count = cpu_to_be32(sg->length);
2153}
2154
2155static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
2156{
2157 dseg->byte_count = cpu_to_be32(sg->length);
2158 dseg->lkey = cpu_to_be32(sg->lkey);
2159 dseg->addr = cpu_to_be64(sg->addr);
2160}
2161
2162static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
2163 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
2164 __be32 *lso_hdr_sz, __be32 *blh)
2165{
2166 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
2167
2168 if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
2169 *blh = cpu_to_be32(1 << 6);
2170
2171 if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
2172 wr->num_sge > qp->sq.max_gs - (halign >> 4)))
2173 return -EINVAL;
2174
2175 memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
2176
2177 *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
2178 wr->wr.ud.hlen);
2179 *lso_seg_len = halign;
2180 return 0;
2181}
2182
2183static __be32 send_ieth(struct ib_send_wr *wr)
2184{
2185 switch (wr->opcode) {
2186 case IB_WR_SEND_WITH_IMM:
2187 case IB_WR_RDMA_WRITE_WITH_IMM:
2188 return wr->ex.imm_data;
2189
2190 case IB_WR_SEND_WITH_INV:
2191 return cpu_to_be32(wr->ex.invalidate_rkey);
2192
2193 default:
2194 return 0;
2195 }
2196}
2197
2198static void add_zero_len_inline(void *wqe)
2199{
2200 struct mlx4_wqe_inline_seg *inl = wqe;
2201 memset(wqe, 0, 16);
2202 inl->byte_count = cpu_to_be32(1 << 31);
2203}
2204
2205int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2206 struct ib_send_wr **bad_wr)
2207{
2208 struct mlx4_ib_qp *qp = to_mqp(ibqp);
2209 void *wqe;
2210 struct mlx4_wqe_ctrl_seg *ctrl;
2211 struct mlx4_wqe_data_seg *dseg;
2212 unsigned long flags;
2213 int nreq;
2214 int err = 0;
2215 unsigned ind;
2216 int uninitialized_var(stamp);
2217 int uninitialized_var(size);
2218 unsigned uninitialized_var(seglen);
2219 __be32 dummy;
2220 __be32 *lso_wqe;
2221 __be32 uninitialized_var(lso_hdr_sz);
2222 __be32 blh;
2223 int i;
2224
2225 spin_lock_irqsave(&qp->sq.lock, flags);
2226
2227 ind = qp->sq_next_wqe;
2228
2229 for (nreq = 0; wr; ++nreq, wr = wr->next) {
2230 lso_wqe = &dummy;
2231 blh = 0;
2232
2233 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
2234 err = -ENOMEM;
2235 *bad_wr = wr;
2236 goto out;
2237 }
2238
2239 if (unlikely(wr->num_sge > qp->sq.max_gs)) {
2240 err = -EINVAL;
2241 *bad_wr = wr;
2242 goto out;
2243 }
2244
2245 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
2246 qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
2247
2248 ctrl->srcrb_flags =
2249 (wr->send_flags & IB_SEND_SIGNALED ?
2250 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
2251 (wr->send_flags & IB_SEND_SOLICITED ?
2252 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
2253 ((wr->send_flags & IB_SEND_IP_CSUM) ?
2254 cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
2255 MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
2256 qp->sq_signal_bits;
2257
2258 ctrl->imm = send_ieth(wr);
2259
2260 wqe += sizeof *ctrl;
2261 size = sizeof *ctrl / 16;
2262
2263 switch (qp->mlx4_ib_qp_type) {
2264 case MLX4_IB_QPT_RC:
2265 case MLX4_IB_QPT_UC:
2266 switch (wr->opcode) {
2267 case IB_WR_ATOMIC_CMP_AND_SWP:
2268 case IB_WR_ATOMIC_FETCH_AND_ADD:
2269 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
2270 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
2271 wr->wr.atomic.rkey);
2272 wqe += sizeof (struct mlx4_wqe_raddr_seg);
2273
2274 set_atomic_seg(wqe, wr);
2275 wqe += sizeof (struct mlx4_wqe_atomic_seg);
2276
2277 size += (sizeof (struct mlx4_wqe_raddr_seg) +
2278 sizeof (struct mlx4_wqe_atomic_seg)) / 16;
2279
2280 break;
2281
2282 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
2283 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
2284 wr->wr.atomic.rkey);
2285 wqe += sizeof (struct mlx4_wqe_raddr_seg);
2286
2287 set_masked_atomic_seg(wqe, wr);
2288 wqe += sizeof (struct mlx4_wqe_masked_atomic_seg);
2289
2290 size += (sizeof (struct mlx4_wqe_raddr_seg) +
2291 sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
2292
2293 break;
2294
2295 case IB_WR_RDMA_READ:
2296 case IB_WR_RDMA_WRITE:
2297 case IB_WR_RDMA_WRITE_WITH_IMM:
2298 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
2299 wr->wr.rdma.rkey);
2300 wqe += sizeof (struct mlx4_wqe_raddr_seg);
2301 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
2302 break;
2303
2304 case IB_WR_LOCAL_INV:
2305 ctrl->srcrb_flags |=
2306 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2307 set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
2308 wqe += sizeof (struct mlx4_wqe_local_inval_seg);
2309 size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
2310 break;
2311
2312 case IB_WR_FAST_REG_MR:
2313 ctrl->srcrb_flags |=
2314 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2315 set_fmr_seg(wqe, wr);
2316 wqe += sizeof (struct mlx4_wqe_fmr_seg);
2317 size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
2318 break;
2319
2320 case IB_WR_BIND_MW:
2321 ctrl->srcrb_flags |=
2322 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2323 set_bind_seg(wqe, wr);
2324 wqe += sizeof(struct mlx4_wqe_bind_seg);
2325 size += sizeof(struct mlx4_wqe_bind_seg) / 16;
2326 break;
2327 default:
2328
2329 break;
2330 }
2331 break;
2332
2333 case MLX4_IB_QPT_TUN_SMI_OWNER:
2334 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2335 if (unlikely(err)) {
2336 *bad_wr = wr;
2337 goto out;
2338 }
2339 wqe += seglen;
2340 size += seglen / 16;
2341 break;
2342 case MLX4_IB_QPT_TUN_SMI:
2343 case MLX4_IB_QPT_TUN_GSI:
2344
2345 set_datagram_seg(wqe, wr);
2346
2347 *(__be32 *) wqe |= cpu_to_be32(0x80000000);
2348 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2349 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2350 break;
2351 case MLX4_IB_QPT_UD:
2352 set_datagram_seg(wqe, wr);
2353 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2354 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2355
2356 if (wr->opcode == IB_WR_LSO) {
2357 err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
2358 if (unlikely(err)) {
2359 *bad_wr = wr;
2360 goto out;
2361 }
2362 lso_wqe = (__be32 *) wqe;
2363 wqe += seglen;
2364 size += seglen / 16;
2365 }
2366 break;
2367
2368 case MLX4_IB_QPT_PROXY_SMI_OWNER:
2369 if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
2370 err = -ENOSYS;
2371 *bad_wr = wr;
2372 goto out;
2373 }
2374 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2375 if (unlikely(err)) {
2376 *bad_wr = wr;
2377 goto out;
2378 }
2379 wqe += seglen;
2380 size += seglen / 16;
2381
2382 add_zero_len_inline(wqe);
2383 wqe += 16;
2384 size++;
2385 build_tunnel_header(wr, wqe, &seglen);
2386 wqe += seglen;
2387 size += seglen / 16;
2388 break;
2389 case MLX4_IB_QPT_PROXY_SMI:
2390
2391 err = -ENOSYS;
2392 *bad_wr = wr;
2393 goto out;
2394 case MLX4_IB_QPT_PROXY_GSI:
2395
2396
2397
2398
2399 set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
2400 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2401 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2402 build_tunnel_header(wr, wqe, &seglen);
2403 wqe += seglen;
2404 size += seglen / 16;
2405 break;
2406
2407 case MLX4_IB_QPT_SMI:
2408 case MLX4_IB_QPT_GSI:
2409 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
2410 if (unlikely(err)) {
2411 *bad_wr = wr;
2412 goto out;
2413 }
2414 wqe += seglen;
2415 size += seglen / 16;
2416 break;
2417
2418 default:
2419 break;
2420 }
2421
2422
2423
2424
2425
2426
2427
2428
2429 dseg = wqe;
2430 dseg += wr->num_sge - 1;
2431 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
2432
2433
2434 if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
2435 qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
2436 qp->mlx4_ib_qp_type &
2437 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
2438 set_mlx_icrc_seg(dseg + 1);
2439 size += sizeof (struct mlx4_wqe_data_seg) / 16;
2440 }
2441
2442 for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
2443 set_data_seg(dseg, wr->sg_list + i);
2444
2445
2446
2447
2448
2449
2450 wmb();
2451 *lso_wqe = lso_hdr_sz;
2452
2453 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
2454 MLX4_WQE_CTRL_FENCE : 0) | size;
2455
2456
2457
2458
2459
2460
2461 wmb();
2462
2463 if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
2464 *bad_wr = wr;
2465 err = -EINVAL;
2466 goto out;
2467 }
2468
2469 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
2470 (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
2471
2472 stamp = ind + qp->sq_spare_wqes;
2473 ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484 if (wr->next) {
2485 stamp_send_wqe(qp, stamp, size * 16);
2486 ind = pad_wraparound(qp, ind);
2487 }
2488 }
2489
2490out:
2491 if (likely(nreq)) {
2492 qp->sq.head += nreq;
2493
2494
2495
2496
2497
2498 wmb();
2499
2500 writel(qp->doorbell_qpn,
2501 to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
2502
2503
2504
2505
2506
2507 mmiowb();
2508
2509 stamp_send_wqe(qp, stamp, size * 16);
2510
2511 ind = pad_wraparound(qp, ind);
2512 qp->sq_next_wqe = ind;
2513 }
2514
2515 spin_unlock_irqrestore(&qp->sq.lock, flags);
2516
2517 return err;
2518}
2519
2520int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2521 struct ib_recv_wr **bad_wr)
2522{
2523 struct mlx4_ib_qp *qp = to_mqp(ibqp);
2524 struct mlx4_wqe_data_seg *scat;
2525 unsigned long flags;
2526 int err = 0;
2527 int nreq;
2528 int ind;
2529 int max_gs;
2530 int i;
2531
2532 max_gs = qp->rq.max_gs;
2533 spin_lock_irqsave(&qp->rq.lock, flags);
2534
2535 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
2536
2537 for (nreq = 0; wr; ++nreq, wr = wr->next) {
2538 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2539 err = -ENOMEM;
2540 *bad_wr = wr;
2541 goto out;
2542 }
2543
2544 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
2545 err = -EINVAL;
2546 *bad_wr = wr;
2547 goto out;
2548 }
2549
2550 scat = get_recv_wqe(qp, ind);
2551
2552 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
2553 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
2554 ib_dma_sync_single_for_device(ibqp->device,
2555 qp->sqp_proxy_rcv[ind].map,
2556 sizeof (struct mlx4_ib_proxy_sqp_hdr),
2557 DMA_FROM_DEVICE);
2558 scat->byte_count =
2559 cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
2560
2561 scat->lkey = cpu_to_be32(wr->sg_list->lkey);
2562 scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
2563 scat++;
2564 max_gs--;
2565 }
2566
2567 for (i = 0; i < wr->num_sge; ++i)
2568 __set_data_seg(scat + i, wr->sg_list + i);
2569
2570 if (i < max_gs) {
2571 scat[i].byte_count = 0;
2572 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
2573 scat[i].addr = 0;
2574 }
2575
2576 qp->rq.wrid[ind] = wr->wr_id;
2577
2578 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
2579 }
2580
2581out:
2582 if (likely(nreq)) {
2583 qp->rq.head += nreq;
2584
2585
2586
2587
2588
2589 wmb();
2590
2591 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
2592 }
2593
2594 spin_unlock_irqrestore(&qp->rq.lock, flags);
2595
2596 return err;
2597}
2598
2599static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
2600{
2601 switch (mlx4_state) {
2602 case MLX4_QP_STATE_RST: return IB_QPS_RESET;
2603 case MLX4_QP_STATE_INIT: return IB_QPS_INIT;
2604 case MLX4_QP_STATE_RTR: return IB_QPS_RTR;
2605 case MLX4_QP_STATE_RTS: return IB_QPS_RTS;
2606 case MLX4_QP_STATE_SQ_DRAINING:
2607 case MLX4_QP_STATE_SQD: return IB_QPS_SQD;
2608 case MLX4_QP_STATE_SQER: return IB_QPS_SQE;
2609 case MLX4_QP_STATE_ERR: return IB_QPS_ERR;
2610 default: return -1;
2611 }
2612}
2613
2614static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)
2615{
2616 switch (mlx4_mig_state) {
2617 case MLX4_QP_PM_ARMED: return IB_MIG_ARMED;
2618 case MLX4_QP_PM_REARM: return IB_MIG_REARM;
2619 case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED;
2620 default: return -1;
2621 }
2622}
2623
2624static int to_ib_qp_access_flags(int mlx4_flags)
2625{
2626 int ib_flags = 0;
2627
2628 if (mlx4_flags & MLX4_QP_BIT_RRE)
2629 ib_flags |= IB_ACCESS_REMOTE_READ;
2630 if (mlx4_flags & MLX4_QP_BIT_RWE)
2631 ib_flags |= IB_ACCESS_REMOTE_WRITE;
2632 if (mlx4_flags & MLX4_QP_BIT_RAE)
2633 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
2634
2635 return ib_flags;
2636}
2637
2638static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
2639 struct mlx4_qp_path *path)
2640{
2641 struct mlx4_dev *dev = ibdev->dev;
2642 int is_eth;
2643
2644 memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
2645 ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
2646
2647 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
2648 return;
2649
2650 is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
2651 IB_LINK_LAYER_ETHERNET;
2652 if (is_eth)
2653 ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
2654 ((path->sched_queue & 4) << 1);
2655 else
2656 ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
2657
2658 ib_ah_attr->dlid = be16_to_cpu(path->rlid);
2659 ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
2660 ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
2661 ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
2662 if (ib_ah_attr->ah_flags) {
2663 ib_ah_attr->grh.sgid_index = path->mgid_index;
2664 ib_ah_attr->grh.hop_limit = path->hop_limit;
2665 ib_ah_attr->grh.traffic_class =
2666 (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
2667 ib_ah_attr->grh.flow_label =
2668 be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
2669 memcpy(ib_ah_attr->grh.dgid.raw,
2670 path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
2671 }
2672}
2673
2674int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
2675 struct ib_qp_init_attr *qp_init_attr)
2676{
2677 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
2678 struct mlx4_ib_qp *qp = to_mqp(ibqp);
2679 struct mlx4_qp_context context;
2680 int mlx4_state;
2681 int err = 0;
2682
2683 mutex_lock(&qp->mutex);
2684
2685 if (qp->state == IB_QPS_RESET) {
2686 qp_attr->qp_state = IB_QPS_RESET;
2687 goto done;
2688 }
2689
2690 err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
2691 if (err) {
2692 err = -EINVAL;
2693 goto out;
2694 }
2695
2696 mlx4_state = be32_to_cpu(context.flags) >> 28;
2697
2698 qp->state = to_ib_qp_state(mlx4_state);
2699 qp_attr->qp_state = qp->state;
2700 qp_attr->path_mtu = context.mtu_msgmax >> 5;
2701 qp_attr->path_mig_state =
2702 to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
2703 qp_attr->qkey = be32_to_cpu(context.qkey);
2704 qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
2705 qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff;
2706 qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff;
2707 qp_attr->qp_access_flags =
2708 to_ib_qp_access_flags(be32_to_cpu(context.params2));
2709
2710 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
2711 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
2712 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
2713 qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
2714 qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
2715 }
2716
2717 qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
2718 if (qp_attr->qp_state == IB_QPS_INIT)
2719 qp_attr->port_num = qp->port;
2720 else
2721 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
2722
2723
2724 qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
2725
2726 qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
2727
2728 qp_attr->max_dest_rd_atomic =
2729 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
2730 qp_attr->min_rnr_timer =
2731 (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
2732 qp_attr->timeout = context.pri_path.ackto >> 3;
2733 qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7;
2734 qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
2735 qp_attr->alt_timeout = context.alt_path.ackto >> 3;
2736
2737done:
2738 qp_attr->cur_qp_state = qp_attr->qp_state;
2739 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
2740 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
2741
2742 if (!ibqp->uobject) {
2743 qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
2744 qp_attr->cap.max_send_sge = qp->sq.max_gs;
2745 } else {
2746 qp_attr->cap.max_send_wr = 0;
2747 qp_attr->cap.max_send_sge = 0;
2748 }
2749
2750
2751
2752
2753
2754 qp_attr->cap.max_inline_data = 0;
2755
2756 qp_init_attr->cap = qp_attr->cap;
2757
2758 qp_init_attr->create_flags = 0;
2759 if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)
2760 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
2761
2762 if (qp->flags & MLX4_IB_QP_LSO)
2763 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
2764
2765 qp_init_attr->sq_sig_type =
2766 qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
2767 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
2768
2769out:
2770 mutex_unlock(&qp->mutex);
2771 return err;
2772}
2773
2774