1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#include <linux/log2.h>
35#include <linux/slab.h>
36#include <linux/netdevice.h>
37
38#include <rdma/ib_cache.h>
39#include <rdma/ib_pack.h>
40#include <rdma/ib_addr.h>
41#include <rdma/ib_mad.h>
42
43#include <linux/mlx4/qp.h>
44
45#include "mlx4_ib.h"
46#include "user.h"
47
48enum {
49 MLX4_IB_ACK_REQ_FREQ = 8,
50};
51
52enum {
53 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
54 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
55 MLX4_IB_LINK_TYPE_IB = 0,
56 MLX4_IB_LINK_TYPE_ETH = 1
57};
58
59enum {
60
61
62
63
64
65
66 MLX4_IB_UD_HEADER_SIZE = 82,
67 MLX4_IB_LSO_HEADER_SPARE = 128,
68};
69
70enum {
71 MLX4_IB_IBOE_ETHERTYPE = 0x8915
72};
73
74struct mlx4_ib_sqp {
75 struct mlx4_ib_qp qp;
76 int pkey_index;
77 u32 qkey;
78 u32 send_psn;
79 struct ib_ud_header ud_header;
80 u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
81};
82
83enum {
84 MLX4_IB_MIN_SQ_STRIDE = 6,
85 MLX4_IB_CACHE_LINE_SIZE = 64,
86};
87
88enum {
89 MLX4_RAW_QP_MTU = 7,
90 MLX4_RAW_QP_MSGMAX = 31,
91};
92
93static const __be32 mlx4_ib_opcode[] = {
94 [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
95 [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
96 [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
97 [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
98 [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
99 [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
100 [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
101 [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
102 [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
103 [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
104 [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
105 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
106 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
107 [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
108};
109
110static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
111{
112 return container_of(mqp, struct mlx4_ib_sqp, qp);
113}
114
115static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
116{
117 if (!mlx4_is_master(dev->dev))
118 return 0;
119
120 return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
121 qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
122 8 * MLX4_MFUNC_MAX;
123}
124
125static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
126{
127 int proxy_sqp = 0;
128 int real_sqp = 0;
129 int i;
130
131 real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
132 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
133 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
134 if (real_sqp)
135 return 1;
136
137 if (mlx4_is_mfunc(dev->dev)) {
138 for (i = 0; i < dev->dev->caps.num_ports; i++) {
139 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
140 qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
141 proxy_sqp = 1;
142 break;
143 }
144 }
145 }
146 return proxy_sqp;
147}
148
149
150static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
151{
152 int proxy_qp0 = 0;
153 int real_qp0 = 0;
154 int i;
155
156 real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
157 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
158 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
159 if (real_qp0)
160 return 1;
161
162 if (mlx4_is_mfunc(dev->dev)) {
163 for (i = 0; i < dev->dev->caps.num_ports; i++) {
164 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
165 proxy_qp0 = 1;
166 break;
167 }
168 }
169 }
170 return proxy_qp0;
171}
172
173static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
174{
175 return mlx4_buf_offset(&qp->buf, offset);
176}
177
178static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
179{
180 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
181}
182
183static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
184{
185 return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
186}
187
188
189
190
191
192
193
194
195
196
197static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
198{
199 __be32 *wqe;
200 int i;
201 int s;
202 int ind;
203 void *buf;
204 __be32 stamp;
205 struct mlx4_wqe_ctrl_seg *ctrl;
206
207 if (qp->sq_max_wqes_per_wr > 1) {
208 s = roundup(size, 1U << qp->sq.wqe_shift);
209 for (i = 0; i < s; i += 64) {
210 ind = (i >> qp->sq.wqe_shift) + n;
211 stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
212 cpu_to_be32(0xffffffff);
213 buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
214 wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
215 *wqe = stamp;
216 }
217 } else {
218 ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
219 s = (ctrl->fence_size & 0x3f) << 4;
220 for (i = 64; i < s; i += 64) {
221 wqe = buf + i;
222 *wqe = cpu_to_be32(0xffffffff);
223 }
224 }
225}
226
227static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
228{
229 struct mlx4_wqe_ctrl_seg *ctrl;
230 struct mlx4_wqe_inline_seg *inl;
231 void *wqe;
232 int s;
233
234 ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
235 s = sizeof(struct mlx4_wqe_ctrl_seg);
236
237 if (qp->ibqp.qp_type == IB_QPT_UD) {
238 struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl;
239 struct mlx4_av *av = (struct mlx4_av *)dgram->av;
240 memset(dgram, 0, sizeof *dgram);
241 av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn);
242 s += sizeof(struct mlx4_wqe_datagram_seg);
243 }
244
245
246 if (size > s) {
247 inl = wqe + s;
248 inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
249 }
250 ctrl->srcrb_flags = 0;
251 ctrl->fence_size = size / 16;
252
253
254
255
256 wmb();
257
258 ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
259 (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
260
261 stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
262}
263
264
265static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
266{
267 unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
268 if (unlikely(s < qp->sq_max_wqes_per_wr)) {
269 post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
270 ind += s;
271 }
272 return ind;
273}
274
275static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
276{
277 struct ib_event event;
278 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
279
280 if (type == MLX4_EVENT_TYPE_PATH_MIG)
281 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
282
283 if (ibqp->event_handler) {
284 event.device = ibqp->device;
285 event.element.qp = ibqp;
286 switch (type) {
287 case MLX4_EVENT_TYPE_PATH_MIG:
288 event.event = IB_EVENT_PATH_MIG;
289 break;
290 case MLX4_EVENT_TYPE_COMM_EST:
291 event.event = IB_EVENT_COMM_EST;
292 break;
293 case MLX4_EVENT_TYPE_SQ_DRAINED:
294 event.event = IB_EVENT_SQ_DRAINED;
295 break;
296 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
297 event.event = IB_EVENT_QP_LAST_WQE_REACHED;
298 break;
299 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
300 event.event = IB_EVENT_QP_FATAL;
301 break;
302 case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
303 event.event = IB_EVENT_PATH_MIG_ERR;
304 break;
305 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
306 event.event = IB_EVENT_QP_REQ_ERR;
307 break;
308 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
309 event.event = IB_EVENT_QP_ACCESS_ERR;
310 break;
311 default:
312 pr_warn("Unexpected event type %d "
313 "on QP %06x\n", type, qp->qpn);
314 return;
315 }
316
317 ibqp->event_handler(&event, ibqp->qp_context);
318 }
319}
320
321static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
322{
323
324
325
326
327
328
329 switch (type) {
330 case MLX4_IB_QPT_UD:
331 return sizeof (struct mlx4_wqe_ctrl_seg) +
332 sizeof (struct mlx4_wqe_datagram_seg) +
333 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
334 case MLX4_IB_QPT_PROXY_SMI_OWNER:
335 case MLX4_IB_QPT_PROXY_SMI:
336 case MLX4_IB_QPT_PROXY_GSI:
337 return sizeof (struct mlx4_wqe_ctrl_seg) +
338 sizeof (struct mlx4_wqe_datagram_seg) + 64;
339 case MLX4_IB_QPT_TUN_SMI_OWNER:
340 case MLX4_IB_QPT_TUN_GSI:
341 return sizeof (struct mlx4_wqe_ctrl_seg) +
342 sizeof (struct mlx4_wqe_datagram_seg);
343
344 case MLX4_IB_QPT_UC:
345 return sizeof (struct mlx4_wqe_ctrl_seg) +
346 sizeof (struct mlx4_wqe_raddr_seg);
347 case MLX4_IB_QPT_RC:
348 return sizeof (struct mlx4_wqe_ctrl_seg) +
349 sizeof (struct mlx4_wqe_atomic_seg) +
350 sizeof (struct mlx4_wqe_raddr_seg);
351 case MLX4_IB_QPT_SMI:
352 case MLX4_IB_QPT_GSI:
353 return sizeof (struct mlx4_wqe_ctrl_seg) +
354 ALIGN(MLX4_IB_UD_HEADER_SIZE +
355 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
356 MLX4_INLINE_ALIGN) *
357 sizeof (struct mlx4_wqe_inline_seg),
358 sizeof (struct mlx4_wqe_data_seg)) +
359 ALIGN(4 +
360 sizeof (struct mlx4_wqe_inline_seg),
361 sizeof (struct mlx4_wqe_data_seg));
362 default:
363 return sizeof (struct mlx4_wqe_ctrl_seg);
364 }
365}
366
367static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
368 int is_user, int has_rq, struct mlx4_ib_qp *qp)
369{
370
371 if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
372 cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))
373 return -EINVAL;
374
375 if (!has_rq) {
376 if (cap->max_recv_wr)
377 return -EINVAL;
378
379 qp->rq.wqe_cnt = qp->rq.max_gs = 0;
380 } else {
381
382 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
383 return -EINVAL;
384
385 qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr));
386 qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
387 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
388 }
389
390
391 if (is_user) {
392 cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt;
393 cap->max_recv_sge = qp->rq.max_gs;
394 } else {
395 cap->max_recv_wr = qp->rq.max_post =
396 min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
397 cap->max_recv_sge = min(qp->rq.max_gs,
398 min(dev->dev->caps.max_sq_sg,
399 dev->dev->caps.max_rq_sg));
400 }
401
402 return 0;
403}
404
405static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
406 enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
407{
408 int s;
409
410
411 if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
412 cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
413 cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
414 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
415 return -EINVAL;
416
417
418
419
420
421 if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
422 type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
423 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
424 return -EINVAL;
425
426 s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
427 cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
428 send_wqe_overhead(type, qp->flags);
429
430 if (s > dev->dev->caps.max_sq_desc_sz)
431 return -EINVAL;
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
465 qp->sq_signal_bits && BITS_PER_LONG == 64 &&
466 type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
467 !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
468 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
469 qp->sq.wqe_shift = ilog2(64);
470 else
471 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
472
473 for (;;) {
474 qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift);
475
476
477
478
479
480 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr;
481 qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
482 qp->sq_max_wqes_per_wr +
483 qp->sq_spare_wqes);
484
485 if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes)
486 break;
487
488 if (qp->sq_max_wqes_per_wr <= 1)
489 return -EINVAL;
490
491 ++qp->sq.wqe_shift;
492 }
493
494 qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz,
495 (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) -
496 send_wqe_overhead(type, qp->flags)) /
497 sizeof (struct mlx4_wqe_data_seg);
498
499 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
500 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
501 if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
502 qp->rq.offset = 0;
503 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
504 } else {
505 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
506 qp->sq.offset = 0;
507 }
508
509 cap->max_send_wr = qp->sq.max_post =
510 (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr;
511 cap->max_send_sge = min(qp->sq.max_gs,
512 min(dev->dev->caps.max_sq_sg,
513 dev->dev->caps.max_rq_sg));
514
515 cap->max_inline_data = 0;
516
517 return 0;
518}
519
520static int set_user_sq_size(struct mlx4_ib_dev *dev,
521 struct mlx4_ib_qp *qp,
522 struct mlx4_ib_create_qp *ucmd)
523{
524
525 if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
526 ucmd->log_sq_stride >
527 ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
528 ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
529 return -EINVAL;
530
531 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
532 qp->sq.wqe_shift = ucmd->log_sq_stride;
533
534 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
535 (qp->sq.wqe_cnt << qp->sq.wqe_shift);
536
537 return 0;
538}
539
540static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
541{
542 int i;
543
544 qp->sqp_proxy_rcv =
545 kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
546 GFP_KERNEL);
547 if (!qp->sqp_proxy_rcv)
548 return -ENOMEM;
549 for (i = 0; i < qp->rq.wqe_cnt; i++) {
550 qp->sqp_proxy_rcv[i].addr =
551 kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
552 GFP_KERNEL);
553 if (!qp->sqp_proxy_rcv[i].addr)
554 goto err;
555 qp->sqp_proxy_rcv[i].map =
556 ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
557 sizeof (struct mlx4_ib_proxy_sqp_hdr),
558 DMA_FROM_DEVICE);
559 }
560 return 0;
561
562err:
563 while (i > 0) {
564 --i;
565 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
566 sizeof (struct mlx4_ib_proxy_sqp_hdr),
567 DMA_FROM_DEVICE);
568 kfree(qp->sqp_proxy_rcv[i].addr);
569 }
570 kfree(qp->sqp_proxy_rcv);
571 qp->sqp_proxy_rcv = NULL;
572 return -ENOMEM;
573}
574
575static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
576{
577 int i;
578
579 for (i = 0; i < qp->rq.wqe_cnt; i++) {
580 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
581 sizeof (struct mlx4_ib_proxy_sqp_hdr),
582 DMA_FROM_DEVICE);
583 kfree(qp->sqp_proxy_rcv[i].addr);
584 }
585 kfree(qp->sqp_proxy_rcv);
586}
587
588static int qp_has_rq(struct ib_qp_init_attr *attr)
589{
590 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
591 return 0;
592
593 return !attr->srq;
594}
595
596static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
597 struct ib_qp_init_attr *init_attr,
598 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
599{
600 int qpn;
601 int err;
602 struct mlx4_ib_sqp *sqp;
603 struct mlx4_ib_qp *qp;
604 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
605
606
607 if (sqpn) {
608 if (mlx4_is_mfunc(dev->dev) &&
609 (!mlx4_is_master(dev->dev) ||
610 !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
611 if (init_attr->qp_type == IB_QPT_GSI)
612 qp_type = MLX4_IB_QPT_PROXY_GSI;
613 else if (mlx4_is_master(dev->dev))
614 qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
615 else
616 qp_type = MLX4_IB_QPT_PROXY_SMI;
617 }
618 qpn = sqpn;
619
620 init_attr->cap.max_recv_sge++;
621 } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
622 struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
623 container_of(init_attr,
624 struct mlx4_ib_qp_tunnel_init_attr, init_attr);
625 if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
626 tnl_init->proxy_qp_type != IB_QPT_GSI) ||
627 !mlx4_is_master(dev->dev))
628 return -EINVAL;
629 if (tnl_init->proxy_qp_type == IB_QPT_GSI)
630 qp_type = MLX4_IB_QPT_TUN_GSI;
631 else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
632 qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
633 else
634 qp_type = MLX4_IB_QPT_TUN_SMI;
635
636
637 qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
638 + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
639 sqpn = qpn;
640 }
641
642 if (!*caller_qp) {
643 if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
644 (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
645 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
646 sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
647 if (!sqp)
648 return -ENOMEM;
649 qp = &sqp->qp;
650 } else {
651 qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
652 if (!qp)
653 return -ENOMEM;
654 }
655 } else
656 qp = *caller_qp;
657
658 qp->mlx4_ib_qp_type = qp_type;
659
660 mutex_init(&qp->mutex);
661 spin_lock_init(&qp->sq.lock);
662 spin_lock_init(&qp->rq.lock);
663 INIT_LIST_HEAD(&qp->gid_list);
664 INIT_LIST_HEAD(&qp->steering_rules);
665
666 qp->state = IB_QPS_RESET;
667 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
668 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
669
670 err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
671 if (err)
672 goto err;
673
674 if (pd->uobject) {
675 struct mlx4_ib_create_qp ucmd;
676
677 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
678 err = -EFAULT;
679 goto err;
680 }
681
682 qp->sq_no_prefetch = ucmd.sq_no_prefetch;
683
684 err = set_user_sq_size(dev, qp, &ucmd);
685 if (err)
686 goto err;
687
688 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
689 qp->buf_size, 0, 0);
690 if (IS_ERR(qp->umem)) {
691 err = PTR_ERR(qp->umem);
692 goto err;
693 }
694
695 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
696 ilog2(qp->umem->page_size), &qp->mtt);
697 if (err)
698 goto err_buf;
699
700 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
701 if (err)
702 goto err_mtt;
703
704 if (qp_has_rq(init_attr)) {
705 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
706 ucmd.db_addr, &qp->db);
707 if (err)
708 goto err_mtt;
709 }
710 } else {
711 qp->sq_no_prefetch = 0;
712
713 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
714 qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
715
716 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
717 qp->flags |= MLX4_IB_QP_LSO;
718
719 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
720 if (err)
721 goto err;
722
723 if (qp_has_rq(init_attr)) {
724 err = mlx4_db_alloc(dev->dev, &qp->db, 0);
725 if (err)
726 goto err;
727
728 *qp->db.db = 0;
729 }
730
731 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
732 err = -ENOMEM;
733 goto err_db;
734 }
735
736 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
737 &qp->mtt);
738 if (err)
739 goto err_buf;
740
741 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
742 if (err)
743 goto err_mtt;
744
745 qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
746 qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
747
748 if (!qp->sq.wrid || !qp->rq.wrid) {
749 err = -ENOMEM;
750 goto err_wrid;
751 }
752 }
753
754 if (sqpn) {
755 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
756 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
757 if (alloc_proxy_bufs(pd->device, qp)) {
758 err = -ENOMEM;
759 goto err_wrid;
760 }
761 }
762 } else {
763
764
765 if (init_attr->qp_type == IB_QPT_RAW_PACKET)
766 err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
767 else
768 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
769 if (err)
770 goto err_proxy;
771 }
772
773 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
774 if (err)
775 goto err_qpn;
776
777 if (init_attr->qp_type == IB_QPT_XRC_TGT)
778 qp->mqp.qpn |= (1 << 23);
779
780
781
782
783
784
785 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
786
787 qp->mqp.event = mlx4_ib_qp_event;
788 if (!*caller_qp)
789 *caller_qp = qp;
790 return 0;
791
792err_qpn:
793 if (!sqpn)
794 mlx4_qp_release_range(dev->dev, qpn, 1);
795err_proxy:
796 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
797 free_proxy_bufs(pd->device, qp);
798err_wrid:
799 if (pd->uobject) {
800 if (qp_has_rq(init_attr))
801 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
802 } else {
803 kfree(qp->sq.wrid);
804 kfree(qp->rq.wrid);
805 }
806
807err_mtt:
808 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
809
810err_buf:
811 if (pd->uobject)
812 ib_umem_release(qp->umem);
813 else
814 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
815
816err_db:
817 if (!pd->uobject && qp_has_rq(init_attr))
818 mlx4_db_free(dev->dev, &qp->db);
819
820err:
821 if (!*caller_qp)
822 kfree(qp);
823 return err;
824}
825
826static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
827{
828 switch (state) {
829 case IB_QPS_RESET: return MLX4_QP_STATE_RST;
830 case IB_QPS_INIT: return MLX4_QP_STATE_INIT;
831 case IB_QPS_RTR: return MLX4_QP_STATE_RTR;
832 case IB_QPS_RTS: return MLX4_QP_STATE_RTS;
833 case IB_QPS_SQD: return MLX4_QP_STATE_SQD;
834 case IB_QPS_SQE: return MLX4_QP_STATE_SQER;
835 case IB_QPS_ERR: return MLX4_QP_STATE_ERR;
836 default: return -1;
837 }
838}
839
840static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
841 __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
842{
843 if (send_cq == recv_cq) {
844 spin_lock_irq(&send_cq->lock);
845 __acquire(&recv_cq->lock);
846 } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
847 spin_lock_irq(&send_cq->lock);
848 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
849 } else {
850 spin_lock_irq(&recv_cq->lock);
851 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
852 }
853}
854
855static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
856 __releases(&send_cq->lock) __releases(&recv_cq->lock)
857{
858 if (send_cq == recv_cq) {
859 __release(&recv_cq->lock);
860 spin_unlock_irq(&send_cq->lock);
861 } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
862 spin_unlock(&recv_cq->lock);
863 spin_unlock_irq(&send_cq->lock);
864 } else {
865 spin_unlock(&send_cq->lock);
866 spin_unlock_irq(&recv_cq->lock);
867 }
868}
869
870static void del_gid_entries(struct mlx4_ib_qp *qp)
871{
872 struct mlx4_ib_gid_entry *ge, *tmp;
873
874 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
875 list_del(&ge->list);
876 kfree(ge);
877 }
878}
879
880static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
881{
882 if (qp->ibqp.qp_type == IB_QPT_XRC_TGT)
883 return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd);
884 else
885 return to_mpd(qp->ibqp.pd);
886}
887
888static void get_cqs(struct mlx4_ib_qp *qp,
889 struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
890{
891 switch (qp->ibqp.qp_type) {
892 case IB_QPT_XRC_TGT:
893 *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq);
894 *recv_cq = *send_cq;
895 break;
896 case IB_QPT_XRC_INI:
897 *send_cq = to_mcq(qp->ibqp.send_cq);
898 *recv_cq = *send_cq;
899 break;
900 default:
901 *send_cq = to_mcq(qp->ibqp.send_cq);
902 *recv_cq = to_mcq(qp->ibqp.recv_cq);
903 break;
904 }
905}
906
907static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
908 int is_user)
909{
910 struct mlx4_ib_cq *send_cq, *recv_cq;
911
912 if (qp->state != IB_QPS_RESET)
913 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
914 MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
915 pr_warn("modify QP %06x to RESET failed.\n",
916 qp->mqp.qpn);
917
918 get_cqs(qp, &send_cq, &recv_cq);
919
920 mlx4_ib_lock_cqs(send_cq, recv_cq);
921
922 if (!is_user) {
923 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
924 qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
925 if (send_cq != recv_cq)
926 __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
927 }
928
929 mlx4_qp_remove(dev->dev, &qp->mqp);
930
931 mlx4_ib_unlock_cqs(send_cq, recv_cq);
932
933 mlx4_qp_free(dev->dev, &qp->mqp);
934
935 if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
936 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
937
938 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
939
940 if (is_user) {
941 if (qp->rq.wqe_cnt)
942 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
943 &qp->db);
944 ib_umem_release(qp->umem);
945 } else {
946 kfree(qp->sq.wrid);
947 kfree(qp->rq.wrid);
948 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
949 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
950 free_proxy_bufs(&dev->ib_dev, qp);
951 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
952 if (qp->rq.wqe_cnt)
953 mlx4_db_free(dev->dev, &qp->db);
954 }
955
956 del_gid_entries(qp);
957}
958
959static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
960{
961
962 if (!mlx4_is_mfunc(dev->dev) ||
963 (mlx4_is_master(dev->dev) &&
964 attr->create_flags & MLX4_IB_SRIOV_SQP)) {
965 return dev->dev->phys_caps.base_sqpn +
966 (attr->qp_type == IB_QPT_SMI ? 0 : 2) +
967 attr->port_num - 1;
968 }
969
970 if (attr->qp_type == IB_QPT_SMI)
971 return dev->dev->caps.qp0_proxy[attr->port_num - 1];
972 else
973 return dev->dev->caps.qp1_proxy[attr->port_num - 1];
974}
975
976struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
977 struct ib_qp_init_attr *init_attr,
978 struct ib_udata *udata)
979{
980 struct mlx4_ib_qp *qp = NULL;
981 int err;
982 u16 xrcdn = 0;
983
984
985
986
987
988 if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
989 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
990 MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
991 return ERR_PTR(-EINVAL);
992
993 if (init_attr->create_flags &&
994 (udata ||
995 ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
996 init_attr->qp_type != IB_QPT_UD) ||
997 ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
998 init_attr->qp_type > IB_QPT_GSI)))
999 return ERR_PTR(-EINVAL);
1000
1001 switch (init_attr->qp_type) {
1002 case IB_QPT_XRC_TGT:
1003 pd = to_mxrcd(init_attr->xrcd)->pd;
1004 xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
1005 init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
1006
1007 case IB_QPT_XRC_INI:
1008 if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
1009 return ERR_PTR(-ENOSYS);
1010 init_attr->recv_cq = init_attr->send_cq;
1011
1012 case IB_QPT_RC:
1013 case IB_QPT_UC:
1014 case IB_QPT_RAW_PACKET:
1015 qp = kzalloc(sizeof *qp, GFP_KERNEL);
1016 if (!qp)
1017 return ERR_PTR(-ENOMEM);
1018
1019 case IB_QPT_UD:
1020 {
1021 err = create_qp_common(to_mdev(pd->device), pd, init_attr,
1022 udata, 0, &qp);
1023 if (err)
1024 return ERR_PTR(err);
1025
1026 qp->ibqp.qp_num = qp->mqp.qpn;
1027 qp->xrcdn = xrcdn;
1028
1029 break;
1030 }
1031 case IB_QPT_SMI:
1032 case IB_QPT_GSI:
1033 {
1034
1035 if (udata)
1036 return ERR_PTR(-EINVAL);
1037
1038 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
1039 get_sqp_num(to_mdev(pd->device), init_attr),
1040 &qp);
1041 if (err)
1042 return ERR_PTR(err);
1043
1044 qp->port = init_attr->port_num;
1045 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
1046
1047 break;
1048 }
1049 default:
1050
1051 return ERR_PTR(-EINVAL);
1052 }
1053
1054 return &qp->ibqp;
1055}
1056
1057int mlx4_ib_destroy_qp(struct ib_qp *qp)
1058{
1059 struct mlx4_ib_dev *dev = to_mdev(qp->device);
1060 struct mlx4_ib_qp *mqp = to_mqp(qp);
1061 struct mlx4_ib_pd *pd;
1062
1063 if (is_qp0(dev, mqp))
1064 mlx4_CLOSE_PORT(dev->dev, mqp->port);
1065
1066 pd = get_pd(mqp);
1067 destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
1068
1069 if (is_sqp(dev, mqp))
1070 kfree(to_msqp(mqp));
1071 else
1072 kfree(mqp);
1073
1074 return 0;
1075}
1076
1077static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
1078{
1079 switch (type) {
1080 case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC;
1081 case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC;
1082 case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD;
1083 case MLX4_IB_QPT_XRC_INI:
1084 case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC;
1085 case MLX4_IB_QPT_SMI:
1086 case MLX4_IB_QPT_GSI:
1087 case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
1088
1089 case MLX4_IB_QPT_PROXY_SMI_OWNER:
1090 case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
1091 MLX4_QP_ST_MLX : -1);
1092 case MLX4_IB_QPT_PROXY_SMI:
1093 case MLX4_IB_QPT_TUN_SMI:
1094 case MLX4_IB_QPT_PROXY_GSI:
1095 case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ?
1096 MLX4_QP_ST_UD : -1);
1097 default: return -1;
1098 }
1099}
1100
1101static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
1102 int attr_mask)
1103{
1104 u8 dest_rd_atomic;
1105 u32 access_flags;
1106 u32 hw_access_flags = 0;
1107
1108 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1109 dest_rd_atomic = attr->max_dest_rd_atomic;
1110 else
1111 dest_rd_atomic = qp->resp_depth;
1112
1113 if (attr_mask & IB_QP_ACCESS_FLAGS)
1114 access_flags = attr->qp_access_flags;
1115 else
1116 access_flags = qp->atomic_rd_en;
1117
1118 if (!dest_rd_atomic)
1119 access_flags &= IB_ACCESS_REMOTE_WRITE;
1120
1121 if (access_flags & IB_ACCESS_REMOTE_READ)
1122 hw_access_flags |= MLX4_QP_BIT_RRE;
1123 if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
1124 hw_access_flags |= MLX4_QP_BIT_RAE;
1125 if (access_flags & IB_ACCESS_REMOTE_WRITE)
1126 hw_access_flags |= MLX4_QP_BIT_RWE;
1127
1128 return cpu_to_be32(hw_access_flags);
1129}
1130
1131static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
1132 int attr_mask)
1133{
1134 if (attr_mask & IB_QP_PKEY_INDEX)
1135 sqp->pkey_index = attr->pkey_index;
1136 if (attr_mask & IB_QP_QKEY)
1137 sqp->qkey = attr->qkey;
1138 if (attr_mask & IB_QP_SQ_PSN)
1139 sqp->send_psn = attr->sq_psn;
1140}
1141
1142static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
1143{
1144 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
1145}
1146
1147static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
1148 struct mlx4_qp_path *path, u8 port)
1149{
1150 int err;
1151 int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
1152 IB_LINK_LAYER_ETHERNET;
1153 u8 mac[6];
1154 int is_mcast;
1155 u16 vlan_tag;
1156 int vidx;
1157
1158 path->grh_mylmc = ah->src_path_bits & 0x7f;
1159 path->rlid = cpu_to_be16(ah->dlid);
1160 if (ah->static_rate) {
1161 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
1162 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
1163 !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
1164 --path->static_rate;
1165 } else
1166 path->static_rate = 0;
1167
1168 if (ah->ah_flags & IB_AH_GRH) {
1169 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
1170 pr_err("sgid_index (%u) too large. max is %d\n",
1171 ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
1172 return -1;
1173 }
1174
1175 path->grh_mylmc |= 1 << 7;
1176 path->mgid_index = ah->grh.sgid_index;
1177 path->hop_limit = ah->grh.hop_limit;
1178 path->tclass_flowlabel =
1179 cpu_to_be32((ah->grh.traffic_class << 20) |
1180 (ah->grh.flow_label));
1181 memcpy(path->rgid, ah->grh.dgid.raw, 16);
1182 }
1183
1184 if (is_eth) {
1185 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
1186 ((port - 1) << 6) | ((ah->sl & 7) << 3);
1187
1188 if (!(ah->ah_flags & IB_AH_GRH))
1189 return -1;
1190
1191 err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
1192 if (err)
1193 return err;
1194
1195 memcpy(path->dmac, mac, 6);
1196 path->ackto = MLX4_IB_LINK_TYPE_ETH;
1197
1198 path->grh_mylmc &= 0x80;
1199
1200 vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
1201 if (vlan_tag < 0x1000) {
1202 if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
1203 return -ENOENT;
1204
1205 path->vlan_index = vidx;
1206 path->fl = 1 << 6;
1207 }
1208 } else
1209 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
1210 ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
1211
1212 return 0;
1213}
1214
1215static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
1216{
1217 struct mlx4_ib_gid_entry *ge, *tmp;
1218
1219 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
1220 if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) {
1221 ge->added = 1;
1222 ge->port = qp->port;
1223 }
1224 }
1225}
1226
1227static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1228 const struct ib_qp_attr *attr, int attr_mask,
1229 enum ib_qp_state cur_state, enum ib_qp_state new_state)
1230{
1231 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
1232 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1233 struct mlx4_ib_pd *pd;
1234 struct mlx4_ib_cq *send_cq, *recv_cq;
1235 struct mlx4_qp_context *context;
1236 enum mlx4_qp_optpar optpar = 0;
1237 int sqd_event;
1238 int err = -EINVAL;
1239
1240 context = kzalloc(sizeof *context, GFP_KERNEL);
1241 if (!context)
1242 return -ENOMEM;
1243
1244 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
1245 (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
1246
1247 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
1248 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
1249 else {
1250 optpar |= MLX4_QP_OPTPAR_PM_STATE;
1251 switch (attr->path_mig_state) {
1252 case IB_MIG_MIGRATED:
1253 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
1254 break;
1255 case IB_MIG_REARM:
1256 context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
1257 break;
1258 case IB_MIG_ARMED:
1259 context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
1260 break;
1261 }
1262 }
1263
1264 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
1265 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
1266 else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1267 context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
1268 else if (ibqp->qp_type == IB_QPT_UD) {
1269 if (qp->flags & MLX4_IB_QP_LSO)
1270 context->mtu_msgmax = (IB_MTU_4096 << 5) |
1271 ilog2(dev->dev->caps.max_gso_sz);
1272 else
1273 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
1274 } else if (attr_mask & IB_QP_PATH_MTU) {
1275 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
1276 pr_err("path MTU (%u) is invalid\n",
1277 attr->path_mtu);
1278 goto out;
1279 }
1280 context->mtu_msgmax = (attr->path_mtu << 5) |
1281 ilog2(dev->dev->caps.max_msg_sz);
1282 }
1283
1284 if (qp->rq.wqe_cnt)
1285 context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
1286 context->rq_size_stride |= qp->rq.wqe_shift - 4;
1287
1288 if (qp->sq.wqe_cnt)
1289 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
1290 context->sq_size_stride |= qp->sq.wqe_shift - 4;
1291
1292 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
1293 context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
1294 context->xrcd = cpu_to_be32((u32) qp->xrcdn);
1295 }
1296
1297 if (qp->ibqp.uobject)
1298 context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
1299 else
1300 context->usr_page = cpu_to_be32(dev->priv_uar.index);
1301
1302 if (attr_mask & IB_QP_DEST_QPN)
1303 context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
1304
1305 if (attr_mask & IB_QP_PORT) {
1306 if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
1307 !(attr_mask & IB_QP_AV)) {
1308 mlx4_set_sched(&context->pri_path, attr->port_num);
1309 optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
1310 }
1311 }
1312
1313 if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
1314 if (dev->counters[qp->port - 1] != -1) {
1315 context->pri_path.counter_index =
1316 dev->counters[qp->port - 1];
1317 optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
1318 } else
1319 context->pri_path.counter_index = 0xff;
1320 }
1321
1322 if (attr_mask & IB_QP_PKEY_INDEX) {
1323 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1324 context->pri_path.disable_pkey_check = 0x40;
1325 context->pri_path.pkey_index = attr->pkey_index;
1326 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
1327 }
1328
1329 if (attr_mask & IB_QP_AV) {
1330 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
1331 attr_mask & IB_QP_PORT ?
1332 attr->port_num : qp->port))
1333 goto out;
1334
1335 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
1336 MLX4_QP_OPTPAR_SCHED_QUEUE);
1337 }
1338
1339 if (attr_mask & IB_QP_TIMEOUT) {
1340 context->pri_path.ackto |= attr->timeout << 3;
1341 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
1342 }
1343
1344 if (attr_mask & IB_QP_ALT_PATH) {
1345 if (attr->alt_port_num == 0 ||
1346 attr->alt_port_num > dev->dev->caps.num_ports)
1347 goto out;
1348
1349 if (attr->alt_pkey_index >=
1350 dev->dev->caps.pkey_table_len[attr->alt_port_num])
1351 goto out;
1352
1353 if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
1354 attr->alt_port_num))
1355 goto out;
1356
1357 context->alt_path.pkey_index = attr->alt_pkey_index;
1358 context->alt_path.ackto = attr->alt_timeout << 3;
1359 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
1360 }
1361
1362 pd = get_pd(qp);
1363 get_cqs(qp, &send_cq, &recv_cq);
1364 context->pd = cpu_to_be32(pd->pdn);
1365 context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
1366 context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
1367 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
1368
1369
1370 if (!qp->ibqp.uobject)
1371 context->params1 |= cpu_to_be32(1 << 11);
1372
1373 if (attr_mask & IB_QP_RNR_RETRY) {
1374 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
1375 optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
1376 }
1377
1378 if (attr_mask & IB_QP_RETRY_CNT) {
1379 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
1380 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
1381 }
1382
1383 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1384 if (attr->max_rd_atomic)
1385 context->params1 |=
1386 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
1387 optpar |= MLX4_QP_OPTPAR_SRA_MAX;
1388 }
1389
1390 if (attr_mask & IB_QP_SQ_PSN)
1391 context->next_send_psn = cpu_to_be32(attr->sq_psn);
1392
1393 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1394 if (attr->max_dest_rd_atomic)
1395 context->params2 |=
1396 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
1397 optpar |= MLX4_QP_OPTPAR_RRA_MAX;
1398 }
1399
1400 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
1401 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
1402 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
1403 }
1404
1405 if (ibqp->srq)
1406 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
1407
1408 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1409 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
1410 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
1411 }
1412 if (attr_mask & IB_QP_RQ_PSN)
1413 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
1414
1415
1416 if (attr_mask & IB_QP_QKEY) {
1417 if (qp->mlx4_ib_qp_type &
1418 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
1419 context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
1420 else {
1421 if (mlx4_is_mfunc(dev->dev) &&
1422 !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
1423 (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
1424 MLX4_RESERVED_QKEY_BASE) {
1425 pr_err("Cannot use reserved QKEY"
1426 " 0x%x (range 0xffff0000..0xffffffff"
1427 " is reserved)\n", attr->qkey);
1428 err = -EINVAL;
1429 goto out;
1430 }
1431 context->qkey = cpu_to_be32(attr->qkey);
1432 }
1433 optpar |= MLX4_QP_OPTPAR_Q_KEY;
1434 }
1435
1436 if (ibqp->srq)
1437 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
1438
1439 if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1440 context->db_rec_addr = cpu_to_be64(qp->db.dma);
1441
1442 if (cur_state == IB_QPS_INIT &&
1443 new_state == IB_QPS_RTR &&
1444 (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
1445 ibqp->qp_type == IB_QPT_UD ||
1446 ibqp->qp_type == IB_QPT_RAW_PACKET)) {
1447 context->pri_path.sched_queue = (qp->port - 1) << 6;
1448 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
1449 qp->mlx4_ib_qp_type &
1450 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
1451 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
1452 if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
1453 context->pri_path.fl = 0x80;
1454 } else {
1455 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1456 context->pri_path.fl = 0x80;
1457 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
1458 }
1459 }
1460
1461 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
1462 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
1463 sqd_event = 1;
1464 else
1465 sqd_event = 0;
1466
1467 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1468 context->rlkey |= (1 << 4);
1469
1470
1471
1472
1473
1474
1475
1476 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
1477 struct mlx4_wqe_ctrl_seg *ctrl;
1478 int i;
1479
1480 for (i = 0; i < qp->sq.wqe_cnt; ++i) {
1481 ctrl = get_send_wqe(qp, i);
1482 ctrl->owner_opcode = cpu_to_be32(1 << 31);
1483 if (qp->sq_max_wqes_per_wr == 1)
1484 ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
1485
1486 stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
1487 }
1488 }
1489
1490 err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
1491 to_mlx4_state(new_state), context, optpar,
1492 sqd_event, &qp->mqp);
1493 if (err)
1494 goto out;
1495
1496 qp->state = new_state;
1497
1498 if (attr_mask & IB_QP_ACCESS_FLAGS)
1499 qp->atomic_rd_en = attr->qp_access_flags;
1500 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1501 qp->resp_depth = attr->max_dest_rd_atomic;
1502 if (attr_mask & IB_QP_PORT) {
1503 qp->port = attr->port_num;
1504 update_mcg_macs(dev, qp);
1505 }
1506 if (attr_mask & IB_QP_ALT_PATH)
1507 qp->alt_port = attr->alt_port_num;
1508
1509 if (is_sqp(dev, qp))
1510 store_sqp_attrs(to_msqp(qp), attr, attr_mask);
1511
1512
1513
1514
1515
1516 if (is_qp0(dev, qp)) {
1517 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
1518 if (mlx4_INIT_PORT(dev->dev, qp->port))
1519 pr_warn("INIT_PORT failed for port %d\n",
1520 qp->port);
1521
1522 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
1523 (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
1524 mlx4_CLOSE_PORT(dev->dev, qp->port);
1525 }
1526
1527
1528
1529
1530
1531 if (new_state == IB_QPS_RESET && !ibqp->uobject) {
1532 mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
1533 ibqp->srq ? to_msrq(ibqp->srq): NULL);
1534 if (send_cq != recv_cq)
1535 mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1536
1537 qp->rq.head = 0;
1538 qp->rq.tail = 0;
1539 qp->sq.head = 0;
1540 qp->sq.tail = 0;
1541 qp->sq_next_wqe = 0;
1542 if (qp->rq.wqe_cnt)
1543 *qp->db.db = 0;
1544 }
1545
1546out:
1547 kfree(context);
1548 return err;
1549}
1550
1551int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1552 int attr_mask, struct ib_udata *udata)
1553{
1554 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
1555 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1556 enum ib_qp_state cur_state, new_state;
1557 int err = -EINVAL;
1558
1559 mutex_lock(&qp->mutex);
1560
1561 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
1562 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1563
1564 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
1565 pr_debug("qpn 0x%x: invalid attribute mask specified "
1566 "for transition %d to %d. qp_type %d,"
1567 " attr_mask 0x%x\n",
1568 ibqp->qp_num, cur_state, new_state,
1569 ibqp->qp_type, attr_mask);
1570 goto out;
1571 }
1572
1573 if ((attr_mask & IB_QP_PORT) &&
1574 (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
1575 pr_debug("qpn 0x%x: invalid port number (%d) specified "
1576 "for transition %d to %d. qp_type %d\n",
1577 ibqp->qp_num, attr->port_num, cur_state,
1578 new_state, ibqp->qp_type);
1579 goto out;
1580 }
1581
1582 if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
1583 (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
1584 IB_LINK_LAYER_ETHERNET))
1585 goto out;
1586
1587 if (attr_mask & IB_QP_PKEY_INDEX) {
1588 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1589 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) {
1590 pr_debug("qpn 0x%x: invalid pkey index (%d) specified "
1591 "for transition %d to %d. qp_type %d\n",
1592 ibqp->qp_num, attr->pkey_index, cur_state,
1593 new_state, ibqp->qp_type);
1594 goto out;
1595 }
1596 }
1597
1598 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1599 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
1600 pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. "
1601 "Transition %d to %d. qp_type %d\n",
1602 ibqp->qp_num, attr->max_rd_atomic, cur_state,
1603 new_state, ibqp->qp_type);
1604 goto out;
1605 }
1606
1607 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1608 attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
1609 pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
1610 "Transition %d to %d. qp_type %d\n",
1611 ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
1612 new_state, ibqp->qp_type);
1613 goto out;
1614 }
1615
1616 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1617 err = 0;
1618 goto out;
1619 }
1620
1621 err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
1622
1623out:
1624 mutex_unlock(&qp->mutex);
1625 return err;
1626}
1627
1628static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
1629 struct ib_send_wr *wr,
1630 void *wqe, unsigned *mlx_seg_len)
1631{
1632 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
1633 struct ib_device *ib_dev = &mdev->ib_dev;
1634 struct mlx4_wqe_mlx_seg *mlx = wqe;
1635 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1636 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
1637 u16 pkey;
1638 u32 qkey;
1639 int send_size;
1640 int header_size;
1641 int spc;
1642 int i;
1643
1644 if (wr->opcode != IB_WR_SEND)
1645 return -EINVAL;
1646
1647 send_size = 0;
1648
1649 for (i = 0; i < wr->num_sge; ++i)
1650 send_size += wr->sg_list[i].length;
1651
1652
1653
1654 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
1655 send_size += sizeof (struct mlx4_ib_tunnel_header);
1656
1657 ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
1658
1659 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
1660 sqp->ud_header.lrh.service_level =
1661 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
1662 sqp->ud_header.lrh.destination_lid =
1663 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1664 sqp->ud_header.lrh.source_lid =
1665 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1666 }
1667
1668 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
1669
1670
1671 mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
1672 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1673
1674 sqp->ud_header.lrh.virtual_lane = 0;
1675 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1676 ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
1677 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1678 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
1679 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1680 else
1681 sqp->ud_header.bth.destination_qpn =
1682 cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
1683
1684 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1685 if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
1686 return -EINVAL;
1687 sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
1688 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
1689
1690 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1691 sqp->ud_header.immediate_present = 0;
1692
1693 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
1694
1695
1696
1697
1698
1699
1700
1701 spc = MLX4_INLINE_ALIGN -
1702 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
1703 if (header_size <= spc) {
1704 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1705 memcpy(inl + 1, sqp->header_buf, header_size);
1706 i = 1;
1707 } else {
1708 inl->byte_count = cpu_to_be32(1 << 31 | spc);
1709 memcpy(inl + 1, sqp->header_buf, spc);
1710
1711 inl = (void *) (inl + 1) + spc;
1712 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726 wmb();
1727 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
1728 i = 2;
1729 }
1730
1731 *mlx_seg_len =
1732 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1733 return 0;
1734}
1735
1736static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1737 void *wqe, unsigned *mlx_seg_len)
1738{
1739 struct ib_device *ib_dev = sqp->qp.ibqp.device;
1740 struct mlx4_wqe_mlx_seg *mlx = wqe;
1741 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1742 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
1743 struct net_device *ndev;
1744 union ib_gid sgid;
1745 u16 pkey;
1746 int send_size;
1747 int header_size;
1748 int spc;
1749 int i;
1750 int err = 0;
1751 u16 vlan = 0xffff;
1752 bool is_eth;
1753 bool is_vlan = false;
1754 bool is_grh;
1755
1756 send_size = 0;
1757 for (i = 0; i < wr->num_sge; ++i)
1758 send_size += wr->sg_list[i].length;
1759
1760 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
1761 is_grh = mlx4_ib_ah_grh_present(ah);
1762 if (is_eth) {
1763 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1764
1765
1766
1767 sgid.global.subnet_prefix =
1768 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1769 subnet_prefix;
1770 sgid.global.interface_id =
1771 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1772 guid_cache[ah->av.ib.gid_index];
1773 } else {
1774 err = ib_get_cached_gid(ib_dev,
1775 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1776 ah->av.ib.gid_index, &sgid);
1777 if (err)
1778 return err;
1779 }
1780
1781 vlan = rdma_get_vlan_id(&sgid);
1782 is_vlan = vlan < 0x1000;
1783 }
1784 ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
1785
1786 if (!is_eth) {
1787 sqp->ud_header.lrh.service_level =
1788 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
1789 sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid;
1790 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1791 }
1792
1793 if (is_grh) {
1794 sqp->ud_header.grh.traffic_class =
1795 (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
1796 sqp->ud_header.grh.flow_label =
1797 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1798 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
1799 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1800
1801
1802
1803 sqp->ud_header.grh.source_gid.global.subnet_prefix =
1804 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1805 subnet_prefix;
1806 sqp->ud_header.grh.source_gid.global.interface_id =
1807 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1808 guid_cache[ah->av.ib.gid_index];
1809 } else
1810 ib_get_cached_gid(ib_dev,
1811 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1812 ah->av.ib.gid_index,
1813 &sqp->ud_header.grh.source_gid);
1814 memcpy(sqp->ud_header.grh.destination_gid.raw,
1815 ah->av.ib.dgid, 16);
1816 }
1817
1818 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
1819
1820 if (!is_eth) {
1821 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
1822 (sqp->ud_header.lrh.destination_lid ==
1823 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
1824 (sqp->ud_header.lrh.service_level << 8));
1825 if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
1826 mlx->flags |= cpu_to_be32(0x1);
1827 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1828 }
1829
1830 switch (wr->opcode) {
1831 case IB_WR_SEND:
1832 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1833 sqp->ud_header.immediate_present = 0;
1834 break;
1835 case IB_WR_SEND_WITH_IMM:
1836 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
1837 sqp->ud_header.immediate_present = 1;
1838 sqp->ud_header.immediate_data = wr->ex.imm_data;
1839 break;
1840 default:
1841 return -EINVAL;
1842 }
1843
1844 if (is_eth) {
1845 u8 *smac;
1846 u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
1847
1848 mlx->sched_prio = cpu_to_be16(pcp);
1849
1850 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
1851
1852 ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1];
1853 if (!ndev)
1854 return -ENODEV;
1855 smac = ndev->dev_addr;
1856 memcpy(sqp->ud_header.eth.smac_h, smac, 6);
1857 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
1858 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
1859 if (!is_vlan) {
1860 sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
1861 } else {
1862 sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
1863 sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
1864 }
1865 } else {
1866 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
1867 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
1868 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
1869 }
1870 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1871 if (!sqp->qp.ibqp.qp_num)
1872 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
1873 else
1874 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
1875 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1876 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1877 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1878 sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
1879 sqp->qkey : wr->wr.ud.remote_qkey);
1880 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
1881
1882 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
1883
1884 if (0) {
1885 pr_err("built UD header of size %d:\n", header_size);
1886 for (i = 0; i < header_size / 4; ++i) {
1887 if (i % 8 == 0)
1888 pr_err(" [%02x] ", i * 4);
1889 pr_cont(" %08x",
1890 be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
1891 if ((i + 1) % 8 == 0)
1892 pr_cont("\n");
1893 }
1894 pr_err("\n");
1895 }
1896
1897
1898
1899
1900
1901
1902
1903 spc = MLX4_INLINE_ALIGN -
1904 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
1905 if (header_size <= spc) {
1906 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1907 memcpy(inl + 1, sqp->header_buf, header_size);
1908 i = 1;
1909 } else {
1910 inl->byte_count = cpu_to_be32(1 << 31 | spc);
1911 memcpy(inl + 1, sqp->header_buf, spc);
1912
1913 inl = (void *) (inl + 1) + spc;
1914 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928 wmb();
1929 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
1930 i = 2;
1931 }
1932
1933 *mlx_seg_len =
1934 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1935 return 0;
1936}
1937
1938static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
1939{
1940 unsigned cur;
1941 struct mlx4_ib_cq *cq;
1942
1943 cur = wq->head - wq->tail;
1944 if (likely(cur + nreq < wq->max_post))
1945 return 0;
1946
1947 cq = to_mcq(ib_cq);
1948 spin_lock(&cq->lock);
1949 cur = wq->head - wq->tail;
1950 spin_unlock(&cq->lock);
1951
1952 return cur + nreq >= wq->max_post;
1953}
1954
1955static __be32 convert_access(int acc)
1956{
1957 return (acc & IB_ACCESS_REMOTE_ATOMIC ?
1958 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) |
1959 (acc & IB_ACCESS_REMOTE_WRITE ?
1960 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
1961 (acc & IB_ACCESS_REMOTE_READ ?
1962 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) |
1963 (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) |
1964 cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
1965}
1966
1967static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
1968{
1969 struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
1970 int i;
1971
1972 for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
1973 mfrpl->mapped_page_list[i] =
1974 cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
1975 MLX4_MTT_FLAG_PRESENT);
1976
1977 fseg->flags = convert_access(wr->wr.fast_reg.access_flags);
1978 fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey);
1979 fseg->buf_list = cpu_to_be64(mfrpl->map);
1980 fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1981 fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length);
1982 fseg->offset = 0;
1983 fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift);
1984 fseg->reserved[0] = 0;
1985 fseg->reserved[1] = 0;
1986}
1987
1988static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
1989{
1990 bseg->flags1 =
1991 convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
1992 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
1993 MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
1994 MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
1995 bseg->flags2 = 0;
1996 if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
1997 bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
1998 if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
1999 bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
2000 bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
2001 bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
2002 bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
2003 bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
2004}
2005
2006static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
2007{
2008 memset(iseg, 0, sizeof(*iseg));
2009 iseg->mem_key = cpu_to_be32(rkey);
2010}
2011
2012static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
2013 u64 remote_addr, u32 rkey)
2014{
2015 rseg->raddr = cpu_to_be64(remote_addr);
2016 rseg->rkey = cpu_to_be32(rkey);
2017 rseg->reserved = 0;
2018}
2019
2020static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
2021{
2022 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
2023 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
2024 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
2025 } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
2026 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
2027 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
2028 } else {
2029 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
2030 aseg->compare = 0;
2031 }
2032
2033}
2034
2035static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
2036 struct ib_send_wr *wr)
2037{
2038 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
2039 aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
2040 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
2041 aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
2042}
2043
2044static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
2045 struct ib_send_wr *wr)
2046{
2047 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
2048 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2049 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2050 dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
2051 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
2052}
2053
2054static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
2055 struct mlx4_wqe_datagram_seg *dseg,
2056 struct ib_send_wr *wr, enum ib_qp_type qpt)
2057{
2058 union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
2059 struct mlx4_av sqp_av = {0};
2060 int port = *((u8 *) &av->ib.port_pd) & 0x3;
2061
2062
2063 sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
2064 sqp_av.g_slid = av->ib.g_slid & 0x7f;
2065 sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
2066 cpu_to_be32(0xf0000000);
2067
2068 memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
2069
2070 dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
2071
2072 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
2073}
2074
2075static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
2076{
2077 struct mlx4_wqe_inline_seg *inl = wqe;
2078 struct mlx4_ib_tunnel_header hdr;
2079 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
2080 int spc;
2081 int i;
2082
2083 memcpy(&hdr.av, &ah->av, sizeof hdr.av);
2084 hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2085 hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
2086 hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2087
2088 spc = MLX4_INLINE_ALIGN -
2089 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
2090 if (sizeof (hdr) <= spc) {
2091 memcpy(inl + 1, &hdr, sizeof (hdr));
2092 wmb();
2093 inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
2094 i = 1;
2095 } else {
2096 memcpy(inl + 1, &hdr, spc);
2097 wmb();
2098 inl->byte_count = cpu_to_be32(1 << 31 | spc);
2099
2100 inl = (void *) (inl + 1) + spc;
2101 memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
2102 wmb();
2103 inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
2104 i = 2;
2105 }
2106
2107 *mlx_seg_len =
2108 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
2109}
2110
2111static void set_mlx_icrc_seg(void *dseg)
2112{
2113 u32 *t = dseg;
2114 struct mlx4_wqe_inline_seg *iseg = dseg;
2115
2116 t[1] = 0;
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126 wmb();
2127
2128 iseg->byte_count = cpu_to_be32((1 << 31) | 4);
2129}
2130
2131static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
2132{
2133 dseg->lkey = cpu_to_be32(sg->lkey);
2134 dseg->addr = cpu_to_be64(sg->addr);
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144 wmb();
2145
2146 dseg->byte_count = cpu_to_be32(sg->length);
2147}
2148
2149static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
2150{
2151 dseg->byte_count = cpu_to_be32(sg->length);
2152 dseg->lkey = cpu_to_be32(sg->lkey);
2153 dseg->addr = cpu_to_be64(sg->addr);
2154}
2155
2156static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
2157 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
2158 __be32 *lso_hdr_sz, __be32 *blh)
2159{
2160 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
2161
2162 if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
2163 *blh = cpu_to_be32(1 << 6);
2164
2165 if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
2166 wr->num_sge > qp->sq.max_gs - (halign >> 4)))
2167 return -EINVAL;
2168
2169 memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
2170
2171 *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
2172 wr->wr.ud.hlen);
2173 *lso_seg_len = halign;
2174 return 0;
2175}
2176
2177static __be32 send_ieth(struct ib_send_wr *wr)
2178{
2179 switch (wr->opcode) {
2180 case IB_WR_SEND_WITH_IMM:
2181 case IB_WR_RDMA_WRITE_WITH_IMM:
2182 return wr->ex.imm_data;
2183
2184 case IB_WR_SEND_WITH_INV:
2185 return cpu_to_be32(wr->ex.invalidate_rkey);
2186
2187 default:
2188 return 0;
2189 }
2190}
2191
2192static void add_zero_len_inline(void *wqe)
2193{
2194 struct mlx4_wqe_inline_seg *inl = wqe;
2195 memset(wqe, 0, 16);
2196 inl->byte_count = cpu_to_be32(1 << 31);
2197}
2198
2199int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2200 struct ib_send_wr **bad_wr)
2201{
2202 struct mlx4_ib_qp *qp = to_mqp(ibqp);
2203 void *wqe;
2204 struct mlx4_wqe_ctrl_seg *ctrl;
2205 struct mlx4_wqe_data_seg *dseg;
2206 unsigned long flags;
2207 int nreq;
2208 int err = 0;
2209 unsigned ind;
2210 int uninitialized_var(stamp);
2211 int uninitialized_var(size);
2212 unsigned uninitialized_var(seglen);
2213 __be32 dummy;
2214 __be32 *lso_wqe;
2215 __be32 uninitialized_var(lso_hdr_sz);
2216 __be32 blh;
2217 int i;
2218
2219 spin_lock_irqsave(&qp->sq.lock, flags);
2220
2221 ind = qp->sq_next_wqe;
2222
2223 for (nreq = 0; wr; ++nreq, wr = wr->next) {
2224 lso_wqe = &dummy;
2225 blh = 0;
2226
2227 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
2228 err = -ENOMEM;
2229 *bad_wr = wr;
2230 goto out;
2231 }
2232
2233 if (unlikely(wr->num_sge > qp->sq.max_gs)) {
2234 err = -EINVAL;
2235 *bad_wr = wr;
2236 goto out;
2237 }
2238
2239 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
2240 qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
2241
2242 ctrl->srcrb_flags =
2243 (wr->send_flags & IB_SEND_SIGNALED ?
2244 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
2245 (wr->send_flags & IB_SEND_SOLICITED ?
2246 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
2247 ((wr->send_flags & IB_SEND_IP_CSUM) ?
2248 cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
2249 MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
2250 qp->sq_signal_bits;
2251
2252 ctrl->imm = send_ieth(wr);
2253
2254 wqe += sizeof *ctrl;
2255 size = sizeof *ctrl / 16;
2256
2257 switch (qp->mlx4_ib_qp_type) {
2258 case MLX4_IB_QPT_RC:
2259 case MLX4_IB_QPT_UC:
2260 switch (wr->opcode) {
2261 case IB_WR_ATOMIC_CMP_AND_SWP:
2262 case IB_WR_ATOMIC_FETCH_AND_ADD:
2263 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
2264 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
2265 wr->wr.atomic.rkey);
2266 wqe += sizeof (struct mlx4_wqe_raddr_seg);
2267
2268 set_atomic_seg(wqe, wr);
2269 wqe += sizeof (struct mlx4_wqe_atomic_seg);
2270
2271 size += (sizeof (struct mlx4_wqe_raddr_seg) +
2272 sizeof (struct mlx4_wqe_atomic_seg)) / 16;
2273
2274 break;
2275
2276 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
2277 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
2278 wr->wr.atomic.rkey);
2279 wqe += sizeof (struct mlx4_wqe_raddr_seg);
2280
2281 set_masked_atomic_seg(wqe, wr);
2282 wqe += sizeof (struct mlx4_wqe_masked_atomic_seg);
2283
2284 size += (sizeof (struct mlx4_wqe_raddr_seg) +
2285 sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
2286
2287 break;
2288
2289 case IB_WR_RDMA_READ:
2290 case IB_WR_RDMA_WRITE:
2291 case IB_WR_RDMA_WRITE_WITH_IMM:
2292 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
2293 wr->wr.rdma.rkey);
2294 wqe += sizeof (struct mlx4_wqe_raddr_seg);
2295 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
2296 break;
2297
2298 case IB_WR_LOCAL_INV:
2299 ctrl->srcrb_flags |=
2300 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2301 set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
2302 wqe += sizeof (struct mlx4_wqe_local_inval_seg);
2303 size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
2304 break;
2305
2306 case IB_WR_FAST_REG_MR:
2307 ctrl->srcrb_flags |=
2308 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2309 set_fmr_seg(wqe, wr);
2310 wqe += sizeof (struct mlx4_wqe_fmr_seg);
2311 size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
2312 break;
2313
2314 case IB_WR_BIND_MW:
2315 ctrl->srcrb_flags |=
2316 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2317 set_bind_seg(wqe, wr);
2318 wqe += sizeof(struct mlx4_wqe_bind_seg);
2319 size += sizeof(struct mlx4_wqe_bind_seg) / 16;
2320 break;
2321 default:
2322
2323 break;
2324 }
2325 break;
2326
2327 case MLX4_IB_QPT_TUN_SMI_OWNER:
2328 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2329 if (unlikely(err)) {
2330 *bad_wr = wr;
2331 goto out;
2332 }
2333 wqe += seglen;
2334 size += seglen / 16;
2335 break;
2336 case MLX4_IB_QPT_TUN_SMI:
2337 case MLX4_IB_QPT_TUN_GSI:
2338
2339 set_datagram_seg(wqe, wr);
2340
2341 *(__be32 *) wqe |= cpu_to_be32(0x80000000);
2342 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2343 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2344 break;
2345 case MLX4_IB_QPT_UD:
2346 set_datagram_seg(wqe, wr);
2347 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2348 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2349
2350 if (wr->opcode == IB_WR_LSO) {
2351 err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
2352 if (unlikely(err)) {
2353 *bad_wr = wr;
2354 goto out;
2355 }
2356 lso_wqe = (__be32 *) wqe;
2357 wqe += seglen;
2358 size += seglen / 16;
2359 }
2360 break;
2361
2362 case MLX4_IB_QPT_PROXY_SMI_OWNER:
2363 if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
2364 err = -ENOSYS;
2365 *bad_wr = wr;
2366 goto out;
2367 }
2368 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2369 if (unlikely(err)) {
2370 *bad_wr = wr;
2371 goto out;
2372 }
2373 wqe += seglen;
2374 size += seglen / 16;
2375
2376 add_zero_len_inline(wqe);
2377 wqe += 16;
2378 size++;
2379 build_tunnel_header(wr, wqe, &seglen);
2380 wqe += seglen;
2381 size += seglen / 16;
2382 break;
2383 case MLX4_IB_QPT_PROXY_SMI:
2384
2385 err = -ENOSYS;
2386 *bad_wr = wr;
2387 goto out;
2388 case MLX4_IB_QPT_PROXY_GSI:
2389
2390
2391
2392
2393 set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
2394 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2395 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2396 build_tunnel_header(wr, wqe, &seglen);
2397 wqe += seglen;
2398 size += seglen / 16;
2399 break;
2400
2401 case MLX4_IB_QPT_SMI:
2402 case MLX4_IB_QPT_GSI:
2403 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
2404 if (unlikely(err)) {
2405 *bad_wr = wr;
2406 goto out;
2407 }
2408 wqe += seglen;
2409 size += seglen / 16;
2410 break;
2411
2412 default:
2413 break;
2414 }
2415
2416
2417
2418
2419
2420
2421
2422
2423 dseg = wqe;
2424 dseg += wr->num_sge - 1;
2425 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
2426
2427
2428 if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
2429 qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
2430 qp->mlx4_ib_qp_type &
2431 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
2432 set_mlx_icrc_seg(dseg + 1);
2433 size += sizeof (struct mlx4_wqe_data_seg) / 16;
2434 }
2435
2436 for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
2437 set_data_seg(dseg, wr->sg_list + i);
2438
2439
2440
2441
2442
2443
2444 wmb();
2445 *lso_wqe = lso_hdr_sz;
2446
2447 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
2448 MLX4_WQE_CTRL_FENCE : 0) | size;
2449
2450
2451
2452
2453
2454
2455 wmb();
2456
2457 if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
2458 *bad_wr = wr;
2459 err = -EINVAL;
2460 goto out;
2461 }
2462
2463 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
2464 (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
2465
2466 stamp = ind + qp->sq_spare_wqes;
2467 ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478 if (wr->next) {
2479 stamp_send_wqe(qp, stamp, size * 16);
2480 ind = pad_wraparound(qp, ind);
2481 }
2482 }
2483
2484out:
2485 if (likely(nreq)) {
2486 qp->sq.head += nreq;
2487
2488
2489
2490
2491
2492 wmb();
2493
2494 writel(qp->doorbell_qpn,
2495 to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
2496
2497
2498
2499
2500
2501 mmiowb();
2502
2503 stamp_send_wqe(qp, stamp, size * 16);
2504
2505 ind = pad_wraparound(qp, ind);
2506 qp->sq_next_wqe = ind;
2507 }
2508
2509 spin_unlock_irqrestore(&qp->sq.lock, flags);
2510
2511 return err;
2512}
2513
2514int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2515 struct ib_recv_wr **bad_wr)
2516{
2517 struct mlx4_ib_qp *qp = to_mqp(ibqp);
2518 struct mlx4_wqe_data_seg *scat;
2519 unsigned long flags;
2520 int err = 0;
2521 int nreq;
2522 int ind;
2523 int max_gs;
2524 int i;
2525
2526 max_gs = qp->rq.max_gs;
2527 spin_lock_irqsave(&qp->rq.lock, flags);
2528
2529 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
2530
2531 for (nreq = 0; wr; ++nreq, wr = wr->next) {
2532 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2533 err = -ENOMEM;
2534 *bad_wr = wr;
2535 goto out;
2536 }
2537
2538 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
2539 err = -EINVAL;
2540 *bad_wr = wr;
2541 goto out;
2542 }
2543
2544 scat = get_recv_wqe(qp, ind);
2545
2546 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
2547 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
2548 ib_dma_sync_single_for_device(ibqp->device,
2549 qp->sqp_proxy_rcv[ind].map,
2550 sizeof (struct mlx4_ib_proxy_sqp_hdr),
2551 DMA_FROM_DEVICE);
2552 scat->byte_count =
2553 cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
2554
2555 scat->lkey = cpu_to_be32(wr->sg_list->lkey);
2556 scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
2557 scat++;
2558 max_gs--;
2559 }
2560
2561 for (i = 0; i < wr->num_sge; ++i)
2562 __set_data_seg(scat + i, wr->sg_list + i);
2563
2564 if (i < max_gs) {
2565 scat[i].byte_count = 0;
2566 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
2567 scat[i].addr = 0;
2568 }
2569
2570 qp->rq.wrid[ind] = wr->wr_id;
2571
2572 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
2573 }
2574
2575out:
2576 if (likely(nreq)) {
2577 qp->rq.head += nreq;
2578
2579
2580
2581
2582
2583 wmb();
2584
2585 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
2586 }
2587
2588 spin_unlock_irqrestore(&qp->rq.lock, flags);
2589
2590 return err;
2591}
2592
2593static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
2594{
2595 switch (mlx4_state) {
2596 case MLX4_QP_STATE_RST: return IB_QPS_RESET;
2597 case MLX4_QP_STATE_INIT: return IB_QPS_INIT;
2598 case MLX4_QP_STATE_RTR: return IB_QPS_RTR;
2599 case MLX4_QP_STATE_RTS: return IB_QPS_RTS;
2600 case MLX4_QP_STATE_SQ_DRAINING:
2601 case MLX4_QP_STATE_SQD: return IB_QPS_SQD;
2602 case MLX4_QP_STATE_SQER: return IB_QPS_SQE;
2603 case MLX4_QP_STATE_ERR: return IB_QPS_ERR;
2604 default: return -1;
2605 }
2606}
2607
2608static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)
2609{
2610 switch (mlx4_mig_state) {
2611 case MLX4_QP_PM_ARMED: return IB_MIG_ARMED;
2612 case MLX4_QP_PM_REARM: return IB_MIG_REARM;
2613 case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED;
2614 default: return -1;
2615 }
2616}
2617
2618static int to_ib_qp_access_flags(int mlx4_flags)
2619{
2620 int ib_flags = 0;
2621
2622 if (mlx4_flags & MLX4_QP_BIT_RRE)
2623 ib_flags |= IB_ACCESS_REMOTE_READ;
2624 if (mlx4_flags & MLX4_QP_BIT_RWE)
2625 ib_flags |= IB_ACCESS_REMOTE_WRITE;
2626 if (mlx4_flags & MLX4_QP_BIT_RAE)
2627 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
2628
2629 return ib_flags;
2630}
2631
2632static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
2633 struct mlx4_qp_path *path)
2634{
2635 struct mlx4_dev *dev = ibdev->dev;
2636 int is_eth;
2637
2638 memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
2639 ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
2640
2641 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
2642 return;
2643
2644 is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
2645 IB_LINK_LAYER_ETHERNET;
2646 if (is_eth)
2647 ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
2648 ((path->sched_queue & 4) << 1);
2649 else
2650 ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
2651
2652 ib_ah_attr->dlid = be16_to_cpu(path->rlid);
2653 ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
2654 ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
2655 ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
2656 if (ib_ah_attr->ah_flags) {
2657 ib_ah_attr->grh.sgid_index = path->mgid_index;
2658 ib_ah_attr->grh.hop_limit = path->hop_limit;
2659 ib_ah_attr->grh.traffic_class =
2660 (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
2661 ib_ah_attr->grh.flow_label =
2662 be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
2663 memcpy(ib_ah_attr->grh.dgid.raw,
2664 path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
2665 }
2666}
2667
2668int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
2669 struct ib_qp_init_attr *qp_init_attr)
2670{
2671 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
2672 struct mlx4_ib_qp *qp = to_mqp(ibqp);
2673 struct mlx4_qp_context context;
2674 int mlx4_state;
2675 int err = 0;
2676
2677 mutex_lock(&qp->mutex);
2678
2679 if (qp->state == IB_QPS_RESET) {
2680 qp_attr->qp_state = IB_QPS_RESET;
2681 goto done;
2682 }
2683
2684 err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
2685 if (err) {
2686 err = -EINVAL;
2687 goto out;
2688 }
2689
2690 mlx4_state = be32_to_cpu(context.flags) >> 28;
2691
2692 qp->state = to_ib_qp_state(mlx4_state);
2693 qp_attr->qp_state = qp->state;
2694 qp_attr->path_mtu = context.mtu_msgmax >> 5;
2695 qp_attr->path_mig_state =
2696 to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
2697 qp_attr->qkey = be32_to_cpu(context.qkey);
2698 qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
2699 qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff;
2700 qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff;
2701 qp_attr->qp_access_flags =
2702 to_ib_qp_access_flags(be32_to_cpu(context.params2));
2703
2704 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
2705 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
2706 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
2707 qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
2708 qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
2709 }
2710
2711 qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
2712 if (qp_attr->qp_state == IB_QPS_INIT)
2713 qp_attr->port_num = qp->port;
2714 else
2715 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
2716
2717
2718 qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
2719
2720 qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
2721
2722 qp_attr->max_dest_rd_atomic =
2723 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
2724 qp_attr->min_rnr_timer =
2725 (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
2726 qp_attr->timeout = context.pri_path.ackto >> 3;
2727 qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7;
2728 qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
2729 qp_attr->alt_timeout = context.alt_path.ackto >> 3;
2730
2731done:
2732 qp_attr->cur_qp_state = qp_attr->qp_state;
2733 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
2734 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
2735
2736 if (!ibqp->uobject) {
2737 qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
2738 qp_attr->cap.max_send_sge = qp->sq.max_gs;
2739 } else {
2740 qp_attr->cap.max_send_wr = 0;
2741 qp_attr->cap.max_send_sge = 0;
2742 }
2743
2744
2745
2746
2747
2748 qp_attr->cap.max_inline_data = 0;
2749
2750 qp_init_attr->cap = qp_attr->cap;
2751
2752 qp_init_attr->create_flags = 0;
2753 if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)
2754 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
2755
2756 if (qp->flags & MLX4_IB_QP_LSO)
2757 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
2758
2759 qp_init_attr->sq_sig_type =
2760 qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
2761 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
2762
2763out:
2764 mutex_unlock(&qp->mutex);
2765 return err;
2766}
2767
2768