1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#include <linux/dma-mapping.h>
35#include <net/addrconf.h>
36#include "rxe.h"
37#include "rxe_loc.h"
38#include "rxe_queue.h"
39#include "rxe_hw_counters.h"
40
41static int rxe_query_device(struct ib_device *dev,
42 struct ib_device_attr *attr,
43 struct ib_udata *uhw)
44{
45 struct rxe_dev *rxe = to_rdev(dev);
46
47 if (uhw->inlen || uhw->outlen)
48 return -EINVAL;
49
50 *attr = rxe->attr;
51 return 0;
52}
53
54static int rxe_query_port(struct ib_device *dev,
55 u8 port_num, struct ib_port_attr *attr)
56{
57 struct rxe_dev *rxe = to_rdev(dev);
58 struct rxe_port *port;
59 int rc = -EINVAL;
60
61 if (unlikely(port_num != 1)) {
62 pr_warn("invalid port_number %d\n", port_num);
63 goto out;
64 }
65
66 port = &rxe->port;
67
68
69 *attr = port->attr;
70
71 mutex_lock(&rxe->usdev_lock);
72 rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
73 &attr->active_width);
74 mutex_unlock(&rxe->usdev_lock);
75
76out:
77 return rc;
78}
79
80static struct net_device *rxe_get_netdev(struct ib_device *device,
81 u8 port_num)
82{
83 struct rxe_dev *rxe = to_rdev(device);
84
85 if (rxe->ndev) {
86 dev_hold(rxe->ndev);
87 return rxe->ndev;
88 }
89
90 return NULL;
91}
92
93static int rxe_query_pkey(struct ib_device *device,
94 u8 port_num, u16 index, u16 *pkey)
95{
96 struct rxe_dev *rxe = to_rdev(device);
97 struct rxe_port *port;
98
99 if (unlikely(port_num != 1)) {
100 dev_warn(device->dev.parent, "invalid port_num = %d\n",
101 port_num);
102 goto err1;
103 }
104
105 port = &rxe->port;
106
107 if (unlikely(index >= port->attr.pkey_tbl_len)) {
108 dev_warn(device->dev.parent, "invalid index = %d\n",
109 index);
110 goto err1;
111 }
112
113 *pkey = port->pkey_tbl[index];
114 return 0;
115
116err1:
117 return -EINVAL;
118}
119
120static int rxe_modify_device(struct ib_device *dev,
121 int mask, struct ib_device_modify *attr)
122{
123 struct rxe_dev *rxe = to_rdev(dev);
124
125 if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
126 rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
127
128 if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
129 memcpy(rxe->ib_dev.node_desc,
130 attr->node_desc, sizeof(rxe->ib_dev.node_desc));
131 }
132
133 return 0;
134}
135
136static int rxe_modify_port(struct ib_device *dev,
137 u8 port_num, int mask, struct ib_port_modify *attr)
138{
139 struct rxe_dev *rxe = to_rdev(dev);
140 struct rxe_port *port;
141
142 if (unlikely(port_num != 1)) {
143 pr_warn("invalid port_num = %d\n", port_num);
144 goto err1;
145 }
146
147 port = &rxe->port;
148
149 port->attr.port_cap_flags |= attr->set_port_cap_mask;
150 port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
151
152 if (mask & IB_PORT_RESET_QKEY_CNTR)
153 port->attr.qkey_viol_cntr = 0;
154
155 return 0;
156
157err1:
158 return -EINVAL;
159}
160
161static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
162 u8 port_num)
163{
164 struct rxe_dev *rxe = to_rdev(dev);
165
166 return rxe_link_layer(rxe, port_num);
167}
168
169static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,
170 struct ib_udata *udata)
171{
172 struct rxe_dev *rxe = to_rdev(dev);
173 struct rxe_ucontext *uc;
174
175 uc = rxe_alloc(&rxe->uc_pool);
176 return uc ? &uc->ibuc : ERR_PTR(-ENOMEM);
177}
178
179static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
180{
181 struct rxe_ucontext *uc = to_ruc(ibuc);
182
183 rxe_drop_ref(uc);
184 return 0;
185}
186
187static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
188 struct ib_port_immutable *immutable)
189{
190 int err;
191 struct ib_port_attr attr;
192
193 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
194
195 err = ib_query_port(dev, port_num, &attr);
196 if (err)
197 return err;
198
199 immutable->pkey_tbl_len = attr.pkey_tbl_len;
200 immutable->gid_tbl_len = attr.gid_tbl_len;
201 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
202
203 return 0;
204}
205
206static struct ib_pd *rxe_alloc_pd(struct ib_device *dev,
207 struct ib_ucontext *context,
208 struct ib_udata *udata)
209{
210 struct rxe_dev *rxe = to_rdev(dev);
211 struct rxe_pd *pd;
212
213 pd = rxe_alloc(&rxe->pd_pool);
214 return pd ? &pd->ibpd : ERR_PTR(-ENOMEM);
215}
216
217static int rxe_dealloc_pd(struct ib_pd *ibpd)
218{
219 struct rxe_pd *pd = to_rpd(ibpd);
220
221 rxe_drop_ref(pd);
222 return 0;
223}
224
225static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
226 struct rxe_av *av)
227{
228 int err;
229 union ib_gid sgid;
230 struct ib_gid_attr sgid_attr;
231
232 err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr),
233 rdma_ah_read_grh(attr)->sgid_index, &sgid,
234 &sgid_attr);
235 if (err) {
236 pr_err("Failed to query sgid. err = %d\n", err);
237 return err;
238 }
239
240 rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
241 rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid);
242 dev_put(sgid_attr.ndev);
243 return 0;
244}
245
246static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
247 struct rdma_ah_attr *attr,
248 struct ib_udata *udata)
249
250{
251 int err;
252 struct rxe_dev *rxe = to_rdev(ibpd->device);
253 struct rxe_pd *pd = to_rpd(ibpd);
254 struct rxe_ah *ah;
255
256 err = rxe_av_chk_attr(rxe, attr);
257 if (err)
258 goto err1;
259
260 ah = rxe_alloc(&rxe->ah_pool);
261 if (!ah) {
262 err = -ENOMEM;
263 goto err1;
264 }
265
266 rxe_add_ref(pd);
267 ah->pd = pd;
268
269 err = rxe_init_av(rxe, attr, &ah->av);
270 if (err)
271 goto err2;
272
273 return &ah->ibah;
274
275err2:
276 rxe_drop_ref(pd);
277 rxe_drop_ref(ah);
278err1:
279 return ERR_PTR(err);
280}
281
282static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
283{
284 int err;
285 struct rxe_dev *rxe = to_rdev(ibah->device);
286 struct rxe_ah *ah = to_rah(ibah);
287
288 err = rxe_av_chk_attr(rxe, attr);
289 if (err)
290 return err;
291
292 err = rxe_init_av(rxe, attr, &ah->av);
293 if (err)
294 return err;
295
296 return 0;
297}
298
299static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
300{
301 struct rxe_ah *ah = to_rah(ibah);
302
303 memset(attr, 0, sizeof(*attr));
304 attr->type = ibah->type;
305 rxe_av_to_attr(&ah->av, attr);
306 return 0;
307}
308
309static int rxe_destroy_ah(struct ib_ah *ibah)
310{
311 struct rxe_ah *ah = to_rah(ibah);
312
313 rxe_drop_ref(ah->pd);
314 rxe_drop_ref(ah);
315 return 0;
316}
317
318static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr)
319{
320 int err;
321 int i;
322 u32 length;
323 struct rxe_recv_wqe *recv_wqe;
324 int num_sge = ibwr->num_sge;
325
326 if (unlikely(queue_full(rq->queue))) {
327 err = -ENOMEM;
328 goto err1;
329 }
330
331 if (unlikely(num_sge > rq->max_sge)) {
332 err = -EINVAL;
333 goto err1;
334 }
335
336 length = 0;
337 for (i = 0; i < num_sge; i++)
338 length += ibwr->sg_list[i].length;
339
340 recv_wqe = producer_addr(rq->queue);
341 recv_wqe->wr_id = ibwr->wr_id;
342 recv_wqe->num_sge = num_sge;
343
344 memcpy(recv_wqe->dma.sge, ibwr->sg_list,
345 num_sge * sizeof(struct ib_sge));
346
347 recv_wqe->dma.length = length;
348 recv_wqe->dma.resid = length;
349 recv_wqe->dma.num_sge = num_sge;
350 recv_wqe->dma.cur_sge = 0;
351 recv_wqe->dma.sge_offset = 0;
352
353
354
355
356 smp_wmb();
357
358 advance_producer(rq->queue);
359 return 0;
360
361err1:
362 return err;
363}
364
365static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
366 struct ib_srq_init_attr *init,
367 struct ib_udata *udata)
368{
369 int err;
370 struct rxe_dev *rxe = to_rdev(ibpd->device);
371 struct rxe_pd *pd = to_rpd(ibpd);
372 struct rxe_srq *srq;
373 struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
374 struct rxe_create_srq_resp __user *uresp = NULL;
375
376 if (udata) {
377 if (udata->outlen < sizeof(*uresp))
378 return ERR_PTR(-EINVAL);
379 uresp = udata->outbuf;
380 }
381
382 err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
383 if (err)
384 goto err1;
385
386 srq = rxe_alloc(&rxe->srq_pool);
387 if (!srq) {
388 err = -ENOMEM;
389 goto err1;
390 }
391
392 rxe_add_index(srq);
393 rxe_add_ref(pd);
394 srq->pd = pd;
395
396 err = rxe_srq_from_init(rxe, srq, init, context, uresp);
397 if (err)
398 goto err2;
399
400 return &srq->ibsrq;
401
402err2:
403 rxe_drop_ref(pd);
404 rxe_drop_index(srq);
405 rxe_drop_ref(srq);
406err1:
407 return ERR_PTR(err);
408}
409
410static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
411 enum ib_srq_attr_mask mask,
412 struct ib_udata *udata)
413{
414 int err;
415 struct rxe_srq *srq = to_rsrq(ibsrq);
416 struct rxe_dev *rxe = to_rdev(ibsrq->device);
417 struct rxe_modify_srq_cmd ucmd = {};
418
419 if (udata) {
420 if (udata->inlen < sizeof(ucmd))
421 return -EINVAL;
422
423 err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
424 if (err)
425 return err;
426 }
427
428 err = rxe_srq_chk_attr(rxe, srq, attr, mask);
429 if (err)
430 goto err1;
431
432 err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd);
433 if (err)
434 goto err1;
435
436 return 0;
437
438err1:
439 return err;
440}
441
442static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
443{
444 struct rxe_srq *srq = to_rsrq(ibsrq);
445
446 if (srq->error)
447 return -EINVAL;
448
449 attr->max_wr = srq->rq.queue->buf->index_mask;
450 attr->max_sge = srq->rq.max_sge;
451 attr->srq_limit = srq->limit;
452 return 0;
453}
454
455static int rxe_destroy_srq(struct ib_srq *ibsrq)
456{
457 struct rxe_srq *srq = to_rsrq(ibsrq);
458
459 if (srq->rq.queue)
460 rxe_queue_cleanup(srq->rq.queue);
461
462 rxe_drop_ref(srq->pd);
463 rxe_drop_index(srq);
464 rxe_drop_ref(srq);
465
466 return 0;
467}
468
469static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
470 struct ib_recv_wr **bad_wr)
471{
472 int err = 0;
473 unsigned long flags;
474 struct rxe_srq *srq = to_rsrq(ibsrq);
475
476 spin_lock_irqsave(&srq->rq.producer_lock, flags);
477
478 while (wr) {
479 err = post_one_recv(&srq->rq, wr);
480 if (unlikely(err))
481 break;
482 wr = wr->next;
483 }
484
485 spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
486
487 if (err)
488 *bad_wr = wr;
489
490 return err;
491}
492
493static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
494 struct ib_qp_init_attr *init,
495 struct ib_udata *udata)
496{
497 int err;
498 struct rxe_dev *rxe = to_rdev(ibpd->device);
499 struct rxe_pd *pd = to_rpd(ibpd);
500 struct rxe_qp *qp;
501 struct rxe_create_qp_resp __user *uresp = NULL;
502
503 if (udata) {
504 if (udata->outlen < sizeof(*uresp))
505 return ERR_PTR(-EINVAL);
506 uresp = udata->outbuf;
507 }
508
509 err = rxe_qp_chk_init(rxe, init);
510 if (err)
511 goto err1;
512
513 qp = rxe_alloc(&rxe->qp_pool);
514 if (!qp) {
515 err = -ENOMEM;
516 goto err1;
517 }
518
519 if (udata) {
520 if (udata->inlen) {
521 err = -EINVAL;
522 goto err2;
523 }
524 qp->is_user = 1;
525 }
526
527 rxe_add_index(qp);
528
529 err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd);
530 if (err)
531 goto err3;
532
533 return &qp->ibqp;
534
535err3:
536 rxe_drop_index(qp);
537err2:
538 rxe_drop_ref(qp);
539err1:
540 return ERR_PTR(err);
541}
542
543static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
544 int mask, struct ib_udata *udata)
545{
546 int err;
547 struct rxe_dev *rxe = to_rdev(ibqp->device);
548 struct rxe_qp *qp = to_rqp(ibqp);
549
550 err = rxe_qp_chk_attr(rxe, qp, attr, mask);
551 if (err)
552 goto err1;
553
554 err = rxe_qp_from_attr(qp, attr, mask, udata);
555 if (err)
556 goto err1;
557
558 return 0;
559
560err1:
561 return err;
562}
563
564static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
565 int mask, struct ib_qp_init_attr *init)
566{
567 struct rxe_qp *qp = to_rqp(ibqp);
568
569 rxe_qp_to_init(qp, init);
570 rxe_qp_to_attr(qp, attr, mask);
571
572 return 0;
573}
574
575static int rxe_destroy_qp(struct ib_qp *ibqp)
576{
577 struct rxe_qp *qp = to_rqp(ibqp);
578
579 rxe_qp_destroy(qp);
580 rxe_drop_index(qp);
581 rxe_drop_ref(qp);
582 return 0;
583}
584
585static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr,
586 unsigned int mask, unsigned int length)
587{
588 int num_sge = ibwr->num_sge;
589 struct rxe_sq *sq = &qp->sq;
590
591 if (unlikely(num_sge > sq->max_sge))
592 goto err1;
593
594 if (unlikely(mask & WR_ATOMIC_MASK)) {
595 if (length < 8)
596 goto err1;
597
598 if (atomic_wr(ibwr)->remote_addr & 0x7)
599 goto err1;
600 }
601
602 if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
603 (length > sq->max_inline)))
604 goto err1;
605
606 return 0;
607
608err1:
609 return -EINVAL;
610}
611
612static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
613 struct ib_send_wr *ibwr)
614{
615 wr->wr_id = ibwr->wr_id;
616 wr->num_sge = ibwr->num_sge;
617 wr->opcode = ibwr->opcode;
618 wr->send_flags = ibwr->send_flags;
619
620 if (qp_type(qp) == IB_QPT_UD ||
621 qp_type(qp) == IB_QPT_SMI ||
622 qp_type(qp) == IB_QPT_GSI) {
623 wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
624 wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
625 if (qp_type(qp) == IB_QPT_GSI)
626 wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
627 if (wr->opcode == IB_WR_SEND_WITH_IMM)
628 wr->ex.imm_data = ibwr->ex.imm_data;
629 } else {
630 switch (wr->opcode) {
631 case IB_WR_RDMA_WRITE_WITH_IMM:
632 wr->ex.imm_data = ibwr->ex.imm_data;
633
634 case IB_WR_RDMA_READ:
635 case IB_WR_RDMA_WRITE:
636 wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
637 wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey;
638 break;
639 case IB_WR_SEND_WITH_IMM:
640 wr->ex.imm_data = ibwr->ex.imm_data;
641 break;
642 case IB_WR_SEND_WITH_INV:
643 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
644 break;
645 case IB_WR_ATOMIC_CMP_AND_SWP:
646 case IB_WR_ATOMIC_FETCH_AND_ADD:
647 wr->wr.atomic.remote_addr =
648 atomic_wr(ibwr)->remote_addr;
649 wr->wr.atomic.compare_add =
650 atomic_wr(ibwr)->compare_add;
651 wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
652 wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
653 break;
654 case IB_WR_LOCAL_INV:
655 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
656 break;
657 case IB_WR_REG_MR:
658 wr->wr.reg.mr = reg_wr(ibwr)->mr;
659 wr->wr.reg.key = reg_wr(ibwr)->key;
660 wr->wr.reg.access = reg_wr(ibwr)->access;
661 break;
662 default:
663 break;
664 }
665 }
666}
667
668static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
669 unsigned int mask, unsigned int length,
670 struct rxe_send_wqe *wqe)
671{
672 int num_sge = ibwr->num_sge;
673 struct ib_sge *sge;
674 int i;
675 u8 *p;
676
677 init_send_wr(qp, &wqe->wr, ibwr);
678
679 if (qp_type(qp) == IB_QPT_UD ||
680 qp_type(qp) == IB_QPT_SMI ||
681 qp_type(qp) == IB_QPT_GSI)
682 memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));
683
684 if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) {
685 p = wqe->dma.inline_data;
686
687 sge = ibwr->sg_list;
688 for (i = 0; i < num_sge; i++, sge++) {
689 memcpy(p, (void *)(uintptr_t)sge->addr,
690 sge->length);
691
692 p += sge->length;
693 }
694 } else if (mask & WR_REG_MASK) {
695 wqe->mask = mask;
696 wqe->state = wqe_state_posted;
697 return 0;
698 } else
699 memcpy(wqe->dma.sge, ibwr->sg_list,
700 num_sge * sizeof(struct ib_sge));
701
702 wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
703 mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
704 wqe->mask = mask;
705 wqe->dma.length = length;
706 wqe->dma.resid = length;
707 wqe->dma.num_sge = num_sge;
708 wqe->dma.cur_sge = 0;
709 wqe->dma.sge_offset = 0;
710 wqe->state = wqe_state_posted;
711 wqe->ssn = atomic_add_return(1, &qp->ssn);
712
713 return 0;
714}
715
716static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr,
717 unsigned int mask, u32 length)
718{
719 int err;
720 struct rxe_sq *sq = &qp->sq;
721 struct rxe_send_wqe *send_wqe;
722 unsigned long flags;
723
724 err = validate_send_wr(qp, ibwr, mask, length);
725 if (err)
726 return err;
727
728 spin_lock_irqsave(&qp->sq.sq_lock, flags);
729
730 if (unlikely(queue_full(sq->queue))) {
731 err = -ENOMEM;
732 goto err1;
733 }
734
735 send_wqe = producer_addr(sq->queue);
736
737 err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
738 if (unlikely(err))
739 goto err1;
740
741
742
743
744
745 smp_wmb();
746
747 advance_producer(sq->queue);
748 spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
749
750 return 0;
751
752err1:
753 spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
754 return err;
755}
756
757static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
758 struct ib_send_wr **bad_wr)
759{
760 int err = 0;
761 unsigned int mask;
762 unsigned int length = 0;
763 int i;
764
765 while (wr) {
766 mask = wr_opcode_mask(wr->opcode, qp);
767 if (unlikely(!mask)) {
768 err = -EINVAL;
769 *bad_wr = wr;
770 break;
771 }
772
773 if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
774 !(mask & WR_INLINE_MASK))) {
775 err = -EINVAL;
776 *bad_wr = wr;
777 break;
778 }
779
780 length = 0;
781 for (i = 0; i < wr->num_sge; i++)
782 length += wr->sg_list[i].length;
783
784 err = post_one_send(qp, wr, mask, length);
785
786 if (err) {
787 *bad_wr = wr;
788 break;
789 }
790 wr = wr->next;
791 }
792
793 rxe_run_task(&qp->req.task, 1);
794 if (unlikely(qp->req.state == QP_STATE_ERROR))
795 rxe_run_task(&qp->comp.task, 1);
796
797 return err;
798}
799
800static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
801 struct ib_send_wr **bad_wr)
802{
803 struct rxe_qp *qp = to_rqp(ibqp);
804
805 if (unlikely(!qp->valid)) {
806 *bad_wr = wr;
807 return -EINVAL;
808 }
809
810 if (unlikely(qp->req.state < QP_STATE_READY)) {
811 *bad_wr = wr;
812 return -EINVAL;
813 }
814
815 if (qp->is_user) {
816
817 rxe_run_task(&qp->req.task, 0);
818 return 0;
819 } else
820 return rxe_post_send_kernel(qp, wr, bad_wr);
821}
822
823static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
824 struct ib_recv_wr **bad_wr)
825{
826 int err = 0;
827 struct rxe_qp *qp = to_rqp(ibqp);
828 struct rxe_rq *rq = &qp->rq;
829 unsigned long flags;
830
831 if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
832 *bad_wr = wr;
833 err = -EINVAL;
834 goto err1;
835 }
836
837 if (unlikely(qp->srq)) {
838 *bad_wr = wr;
839 err = -EINVAL;
840 goto err1;
841 }
842
843 spin_lock_irqsave(&rq->producer_lock, flags);
844
845 while (wr) {
846 err = post_one_recv(rq, wr);
847 if (unlikely(err)) {
848 *bad_wr = wr;
849 break;
850 }
851 wr = wr->next;
852 }
853
854 spin_unlock_irqrestore(&rq->producer_lock, flags);
855
856 if (qp->resp.state == QP_STATE_ERROR)
857 rxe_run_task(&qp->resp.task, 1);
858
859err1:
860 return err;
861}
862
863static struct ib_cq *rxe_create_cq(struct ib_device *dev,
864 const struct ib_cq_init_attr *attr,
865 struct ib_ucontext *context,
866 struct ib_udata *udata)
867{
868 int err;
869 struct rxe_dev *rxe = to_rdev(dev);
870 struct rxe_cq *cq;
871 struct rxe_create_cq_resp __user *uresp = NULL;
872
873 if (udata) {
874 if (udata->outlen < sizeof(*uresp))
875 return ERR_PTR(-EINVAL);
876 uresp = udata->outbuf;
877 }
878
879 if (attr->flags)
880 return ERR_PTR(-EINVAL);
881
882 err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
883 if (err)
884 goto err1;
885
886 cq = rxe_alloc(&rxe->cq_pool);
887 if (!cq) {
888 err = -ENOMEM;
889 goto err1;
890 }
891
892 err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector,
893 context, uresp);
894 if (err)
895 goto err2;
896
897 return &cq->ibcq;
898
899err2:
900 rxe_drop_ref(cq);
901err1:
902 return ERR_PTR(err);
903}
904
905static int rxe_destroy_cq(struct ib_cq *ibcq)
906{
907 struct rxe_cq *cq = to_rcq(ibcq);
908
909 rxe_cq_disable(cq);
910
911 rxe_drop_ref(cq);
912 return 0;
913}
914
915static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
916{
917 int err;
918 struct rxe_cq *cq = to_rcq(ibcq);
919 struct rxe_dev *rxe = to_rdev(ibcq->device);
920 struct rxe_resize_cq_resp __user *uresp = NULL;
921
922 if (udata) {
923 if (udata->outlen < sizeof(*uresp))
924 return -EINVAL;
925 uresp = udata->outbuf;
926 }
927
928 err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
929 if (err)
930 goto err1;
931
932 err = rxe_cq_resize_queue(cq, cqe, uresp);
933 if (err)
934 goto err1;
935
936 return 0;
937
938err1:
939 return err;
940}
941
942static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
943{
944 int i;
945 struct rxe_cq *cq = to_rcq(ibcq);
946 struct rxe_cqe *cqe;
947 unsigned long flags;
948
949 spin_lock_irqsave(&cq->cq_lock, flags);
950 for (i = 0; i < num_entries; i++) {
951 cqe = queue_head(cq->queue);
952 if (!cqe)
953 break;
954
955 memcpy(wc++, &cqe->ibwc, sizeof(*wc));
956 advance_consumer(cq->queue);
957 }
958 spin_unlock_irqrestore(&cq->cq_lock, flags);
959
960 return i;
961}
962
963static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
964{
965 struct rxe_cq *cq = to_rcq(ibcq);
966 int count = queue_count(cq->queue);
967
968 return (count > wc_cnt) ? wc_cnt : count;
969}
970
971static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
972{
973 struct rxe_cq *cq = to_rcq(ibcq);
974 unsigned long irq_flags;
975 int ret = 0;
976
977 spin_lock_irqsave(&cq->cq_lock, irq_flags);
978 if (cq->notify != IB_CQ_NEXT_COMP)
979 cq->notify = flags & IB_CQ_SOLICITED_MASK;
980
981 if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue))
982 ret = 1;
983
984 spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
985
986 return ret;
987}
988
989static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
990{
991 struct rxe_dev *rxe = to_rdev(ibpd->device);
992 struct rxe_pd *pd = to_rpd(ibpd);
993 struct rxe_mem *mr;
994 int err;
995
996 mr = rxe_alloc(&rxe->mr_pool);
997 if (!mr) {
998 err = -ENOMEM;
999 goto err1;
1000 }
1001
1002 rxe_add_index(mr);
1003
1004 rxe_add_ref(pd);
1005
1006 err = rxe_mem_init_dma(pd, access, mr);
1007 if (err)
1008 goto err2;
1009
1010 return &mr->ibmr;
1011
1012err2:
1013 rxe_drop_ref(pd);
1014 rxe_drop_index(mr);
1015 rxe_drop_ref(mr);
1016err1:
1017 return ERR_PTR(err);
1018}
1019
1020static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
1021 u64 start,
1022 u64 length,
1023 u64 iova,
1024 int access, struct ib_udata *udata)
1025{
1026 int err;
1027 struct rxe_dev *rxe = to_rdev(ibpd->device);
1028 struct rxe_pd *pd = to_rpd(ibpd);
1029 struct rxe_mem *mr;
1030
1031 mr = rxe_alloc(&rxe->mr_pool);
1032 if (!mr) {
1033 err = -ENOMEM;
1034 goto err2;
1035 }
1036
1037 rxe_add_index(mr);
1038
1039 rxe_add_ref(pd);
1040
1041 err = rxe_mem_init_user(pd, start, length, iova,
1042 access, udata, mr);
1043 if (err)
1044 goto err3;
1045
1046 return &mr->ibmr;
1047
1048err3:
1049 rxe_drop_ref(pd);
1050 rxe_drop_index(mr);
1051 rxe_drop_ref(mr);
1052err2:
1053 return ERR_PTR(err);
1054}
1055
1056static int rxe_dereg_mr(struct ib_mr *ibmr)
1057{
1058 struct rxe_mem *mr = to_rmr(ibmr);
1059
1060 mr->state = RXE_MEM_STATE_ZOMBIE;
1061 rxe_drop_ref(mr->pd);
1062 rxe_drop_index(mr);
1063 rxe_drop_ref(mr);
1064 return 0;
1065}
1066
1067static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd,
1068 enum ib_mr_type mr_type,
1069 u32 max_num_sg)
1070{
1071 struct rxe_dev *rxe = to_rdev(ibpd->device);
1072 struct rxe_pd *pd = to_rpd(ibpd);
1073 struct rxe_mem *mr;
1074 int err;
1075
1076 if (mr_type != IB_MR_TYPE_MEM_REG)
1077 return ERR_PTR(-EINVAL);
1078
1079 mr = rxe_alloc(&rxe->mr_pool);
1080 if (!mr) {
1081 err = -ENOMEM;
1082 goto err1;
1083 }
1084
1085 rxe_add_index(mr);
1086
1087 rxe_add_ref(pd);
1088
1089 err = rxe_mem_init_fast(pd, max_num_sg, mr);
1090 if (err)
1091 goto err2;
1092
1093 return &mr->ibmr;
1094
1095err2:
1096 rxe_drop_ref(pd);
1097 rxe_drop_index(mr);
1098 rxe_drop_ref(mr);
1099err1:
1100 return ERR_PTR(err);
1101}
1102
1103static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
1104{
1105 struct rxe_mem *mr = to_rmr(ibmr);
1106 struct rxe_map *map;
1107 struct rxe_phys_buf *buf;
1108
1109 if (unlikely(mr->nbuf == mr->num_buf))
1110 return -ENOMEM;
1111
1112 map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
1113 buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
1114
1115 buf->addr = addr;
1116 buf->size = ibmr->page_size;
1117 mr->nbuf++;
1118
1119 return 0;
1120}
1121
1122static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1123 int sg_nents, unsigned int *sg_offset)
1124{
1125 struct rxe_mem *mr = to_rmr(ibmr);
1126 int n;
1127
1128 mr->nbuf = 0;
1129
1130 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
1131
1132 mr->va = ibmr->iova;
1133 mr->iova = ibmr->iova;
1134 mr->length = ibmr->length;
1135 mr->page_shift = ilog2(ibmr->page_size);
1136 mr->page_mask = ibmr->page_size - 1;
1137 mr->offset = mr->iova & mr->page_mask;
1138
1139 return n;
1140}
1141
1142static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1143{
1144 int err;
1145 struct rxe_dev *rxe = to_rdev(ibqp->device);
1146 struct rxe_qp *qp = to_rqp(ibqp);
1147 struct rxe_mc_grp *grp;
1148
1149
1150 err = rxe_mcast_get_grp(rxe, mgid, &grp);
1151 if (err)
1152 return err;
1153
1154 err = rxe_mcast_add_grp_elem(rxe, qp, grp);
1155
1156 rxe_drop_ref(grp);
1157 return err;
1158}
1159
1160static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1161{
1162 struct rxe_dev *rxe = to_rdev(ibqp->device);
1163 struct rxe_qp *qp = to_rqp(ibqp);
1164
1165 return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
1166}
1167
1168static ssize_t parent_show(struct device *device,
1169 struct device_attribute *attr, char *buf)
1170{
1171 struct rxe_dev *rxe = container_of(device, struct rxe_dev,
1172 ib_dev.dev);
1173
1174 return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
1175}
1176
1177static DEVICE_ATTR_RO(parent);
1178
1179static struct device_attribute *rxe_dev_attributes[] = {
1180 &dev_attr_parent,
1181};
1182
1183int rxe_register_device(struct rxe_dev *rxe)
1184{
1185 int err;
1186 int i;
1187 struct ib_device *dev = &rxe->ib_dev;
1188 struct crypto_shash *tfm;
1189
1190 strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX);
1191 strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1192
1193 dev->owner = THIS_MODULE;
1194 dev->node_type = RDMA_NODE_IB_CA;
1195 dev->phys_port_cnt = 1;
1196 dev->num_comp_vectors = num_possible_cpus();
1197 dev->dev.parent = rxe_dma_device(rxe);
1198 dev->local_dma_lkey = 0;
1199 addrconf_addr_eui48((unsigned char *)&dev->node_guid,
1200 rxe->ndev->dev_addr);
1201 dev->dev.dma_ops = &dma_virt_ops;
1202 dma_coerce_mask_and_coherent(&dev->dev,
1203 dma_get_required_mask(&dev->dev));
1204
1205 dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
1206 dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
1207 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
1208 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE)
1209 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT)
1210 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD)
1211 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD)
1212 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ)
1213 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ)
1214 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ)
1215 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ)
1216 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV)
1217 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP)
1218 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP)
1219 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP)
1220 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP)
1221 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND)
1222 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV)
1223 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ)
1224 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ)
1225 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ)
1226 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ)
1227 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ)
1228 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)
1229 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR)
1230 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR)
1231 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH)
1232 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH)
1233 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH)
1234 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH)
1235 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST)
1236 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)
1237 ;
1238
1239 dev->query_device = rxe_query_device;
1240 dev->modify_device = rxe_modify_device;
1241 dev->query_port = rxe_query_port;
1242 dev->modify_port = rxe_modify_port;
1243 dev->get_link_layer = rxe_get_link_layer;
1244 dev->get_netdev = rxe_get_netdev;
1245 dev->query_pkey = rxe_query_pkey;
1246 dev->alloc_ucontext = rxe_alloc_ucontext;
1247 dev->dealloc_ucontext = rxe_dealloc_ucontext;
1248 dev->mmap = rxe_mmap;
1249 dev->get_port_immutable = rxe_port_immutable;
1250 dev->alloc_pd = rxe_alloc_pd;
1251 dev->dealloc_pd = rxe_dealloc_pd;
1252 dev->create_ah = rxe_create_ah;
1253 dev->modify_ah = rxe_modify_ah;
1254 dev->query_ah = rxe_query_ah;
1255 dev->destroy_ah = rxe_destroy_ah;
1256 dev->create_srq = rxe_create_srq;
1257 dev->modify_srq = rxe_modify_srq;
1258 dev->query_srq = rxe_query_srq;
1259 dev->destroy_srq = rxe_destroy_srq;
1260 dev->post_srq_recv = rxe_post_srq_recv;
1261 dev->create_qp = rxe_create_qp;
1262 dev->modify_qp = rxe_modify_qp;
1263 dev->query_qp = rxe_query_qp;
1264 dev->destroy_qp = rxe_destroy_qp;
1265 dev->post_send = rxe_post_send;
1266 dev->post_recv = rxe_post_recv;
1267 dev->create_cq = rxe_create_cq;
1268 dev->destroy_cq = rxe_destroy_cq;
1269 dev->resize_cq = rxe_resize_cq;
1270 dev->poll_cq = rxe_poll_cq;
1271 dev->peek_cq = rxe_peek_cq;
1272 dev->req_notify_cq = rxe_req_notify_cq;
1273 dev->get_dma_mr = rxe_get_dma_mr;
1274 dev->reg_user_mr = rxe_reg_user_mr;
1275 dev->dereg_mr = rxe_dereg_mr;
1276 dev->alloc_mr = rxe_alloc_mr;
1277 dev->map_mr_sg = rxe_map_mr_sg;
1278 dev->attach_mcast = rxe_attach_mcast;
1279 dev->detach_mcast = rxe_detach_mcast;
1280 dev->get_hw_stats = rxe_ib_get_hw_stats;
1281 dev->alloc_hw_stats = rxe_ib_alloc_hw_stats;
1282
1283 tfm = crypto_alloc_shash("crc32", 0, 0);
1284 if (IS_ERR(tfm)) {
1285 pr_err("failed to allocate crc algorithm err:%ld\n",
1286 PTR_ERR(tfm));
1287 return PTR_ERR(tfm);
1288 }
1289 rxe->tfm = tfm;
1290
1291 dev->driver_id = RDMA_DRIVER_RXE;
1292 err = ib_register_device(dev, NULL);
1293 if (err) {
1294 pr_warn("%s failed with error %d\n", __func__, err);
1295 goto err1;
1296 }
1297
1298 for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
1299 err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
1300 if (err) {
1301 pr_warn("%s failed with error %d for attr number %d\n",
1302 __func__, err, i);
1303 goto err2;
1304 }
1305 }
1306
1307 return 0;
1308
1309err2:
1310 ib_unregister_device(dev);
1311err1:
1312 crypto_free_shash(rxe->tfm);
1313
1314 return err;
1315}
1316
1317int rxe_unregister_device(struct rxe_dev *rxe)
1318{
1319 int i;
1320 struct ib_device *dev = &rxe->ib_dev;
1321
1322 for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i)
1323 device_remove_file(&dev->dev, rxe_dev_attributes[i]);
1324
1325 ib_unregister_device(dev);
1326
1327 return 0;
1328}
1329