1
2
3
4
5
6#include <linux/errno.h>
7#include <linux/types.h>
8#include <linux/net.h>
9#include <linux/scatterlist.h>
10#include <linux/highmem.h>
11
12#include <rdma/iw_cm.h>
13#include <rdma/ib_verbs.h>
14
15#include "siw.h"
16#include "siw_verbs.h"
17#include "siw_mem.h"
18
19
20
21
22
23
24
25
26
27
28
29static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
30 u64 dest_addr, int len)
31{
32 int copied = 0;
33
34 while (len) {
35 struct page *p;
36 int pg_off, bytes, rv;
37 void *dest;
38
39 p = siw_get_upage(umem, dest_addr);
40 if (unlikely(!p)) {
41 pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n",
42 __func__, qp_id(rx_qp(srx)),
43 (void *)(uintptr_t)dest_addr,
44 (void *)(uintptr_t)umem->fp_addr);
45
46 srx->skb_copied += copied;
47 srx->skb_new -= copied;
48
49 return -EFAULT;
50 }
51 pg_off = dest_addr & ~PAGE_MASK;
52 bytes = min(len, (int)PAGE_SIZE - pg_off);
53
54 siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes);
55
56 dest = kmap_atomic(p);
57 rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
58 bytes);
59
60 if (unlikely(rv)) {
61 kunmap_atomic(dest);
62 srx->skb_copied += copied;
63 srx->skb_new -= copied;
64
65 pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n",
66 qp_id(rx_qp(srx)), __func__, len, p, rv);
67
68 return -EFAULT;
69 }
70 if (srx->mpa_crc_hd) {
71 if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) {
72 crypto_shash_update(srx->mpa_crc_hd,
73 (u8 *)(dest + pg_off), bytes);
74 kunmap_atomic(dest);
75 } else {
76 kunmap_atomic(dest);
77
78
79
80
81
82
83
84
85
86
87 siw_crc_skb(srx, bytes);
88 }
89 } else {
90 kunmap_atomic(dest);
91 }
92 srx->skb_offset += bytes;
93 copied += bytes;
94 len -= bytes;
95 dest_addr += bytes;
96 pg_off = 0;
97 }
98 srx->skb_copied += copied;
99 srx->skb_new -= copied;
100
101 return copied;
102}
103
104static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
105{
106 int rv;
107
108 siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len);
109
110 rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
111 if (unlikely(rv)) {
112 pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n",
113 qp_id(rx_qp(srx)), __func__, len, kva, rv);
114
115 return rv;
116 }
117 if (srx->mpa_crc_hd)
118 crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
119
120 srx->skb_offset += len;
121 srx->skb_copied += len;
122 srx->skb_new -= len;
123
124 return len;
125}
126
127static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx,
128 struct siw_mem *mem, u64 addr, int len)
129{
130 struct siw_pbl *pbl = mem->pbl;
131 u64 offset = addr - mem->va;
132 int copied = 0;
133
134 while (len) {
135 int bytes;
136 dma_addr_t buf_addr =
137 siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx);
138 if (!buf_addr)
139 break;
140
141 bytes = min(bytes, len);
142 if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) {
143 copied += bytes;
144 offset += bytes;
145 len -= bytes;
146 } else {
147 break;
148 }
149 }
150 return copied;
151}
152
153
154
155
156
157
158
159
160
161
162
163
164static int siw_rresp_check_ntoh(struct siw_rx_stream *srx,
165 struct siw_rx_fpdu *frx)
166{
167 struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp;
168 struct siw_wqe *wqe = &frx->wqe_active;
169 enum ddp_ecode ecode;
170
171 u32 sink_stag = be32_to_cpu(rresp->sink_stag);
172 u64 sink_to = be64_to_cpu(rresp->sink_to);
173
174 if (frx->first_ddp_seg) {
175 srx->ddp_stag = wqe->sqe.sge[0].lkey;
176 srx->ddp_to = wqe->sqe.sge[0].laddr;
177 frx->pbl_idx = 0;
178 }
179
180
181
182
183
184
185
186
187
188
189 if (unlikely(srx->ddp_stag != sink_stag)) {
190 pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n",
191 qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag);
192 ecode = DDP_ECODE_T_INVALID_STAG;
193 goto error;
194 }
195 if (unlikely(srx->ddp_to != sink_to)) {
196 pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n",
197 qp_id(rx_qp(srx)), (unsigned long long)sink_to,
198 (unsigned long long)srx->ddp_to);
199 ecode = DDP_ECODE_T_BASE_BOUNDS;
200 goto error;
201 }
202 if (unlikely(!frx->more_ddp_segs &&
203 (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) {
204 pr_warn("siw: [QP %u]: rresp len: %d != %d\n",
205 qp_id(rx_qp(srx)),
206 wqe->processed + srx->fpdu_part_rem, wqe->bytes);
207 ecode = DDP_ECODE_T_BASE_BOUNDS;
208 goto error;
209 }
210 return 0;
211error:
212 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
213 DDP_ETYPE_TAGGED_BUF, ecode, 0);
214 return -EINVAL;
215}
216
217
218
219
220
221
222
223
224
225
226
227
228static int siw_write_check_ntoh(struct siw_rx_stream *srx,
229 struct siw_rx_fpdu *frx)
230{
231 struct iwarp_rdma_write *write = &srx->hdr.rwrite;
232 enum ddp_ecode ecode;
233
234 u32 sink_stag = be32_to_cpu(write->sink_stag);
235 u64 sink_to = be64_to_cpu(write->sink_to);
236
237 if (frx->first_ddp_seg) {
238 srx->ddp_stag = sink_stag;
239 srx->ddp_to = sink_to;
240 frx->pbl_idx = 0;
241 } else {
242 if (unlikely(srx->ddp_stag != sink_stag)) {
243 pr_warn("siw: [QP %u]: write stag: %08x != %08x\n",
244 qp_id(rx_qp(srx)), sink_stag,
245 srx->ddp_stag);
246 ecode = DDP_ECODE_T_INVALID_STAG;
247 goto error;
248 }
249 if (unlikely(srx->ddp_to != sink_to)) {
250 pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n",
251 qp_id(rx_qp(srx)),
252 (unsigned long long)sink_to,
253 (unsigned long long)srx->ddp_to);
254 ecode = DDP_ECODE_T_BASE_BOUNDS;
255 goto error;
256 }
257 }
258 return 0;
259error:
260 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
261 DDP_ETYPE_TAGGED_BUF, ecode, 0);
262 return -EINVAL;
263}
264
265
266
267
268
269
270
271
272
273
274
275
276static int siw_send_check_ntoh(struct siw_rx_stream *srx,
277 struct siw_rx_fpdu *frx)
278{
279 struct iwarp_send_inv *send = &srx->hdr.send_inv;
280 struct siw_wqe *wqe = &frx->wqe_active;
281 enum ddp_ecode ecode;
282
283 u32 ddp_msn = be32_to_cpu(send->ddp_msn);
284 u32 ddp_mo = be32_to_cpu(send->ddp_mo);
285 u32 ddp_qn = be32_to_cpu(send->ddp_qn);
286
287 if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) {
288 pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n",
289 qp_id(rx_qp(srx)), ddp_qn);
290 ecode = DDP_ECODE_UT_INVALID_QN;
291 goto error;
292 }
293 if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) {
294 pr_warn("siw: [QP %u]: send msn: %u != %u\n",
295 qp_id(rx_qp(srx)), ddp_msn,
296 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
297 ecode = DDP_ECODE_UT_INVALID_MSN_RANGE;
298 goto error;
299 }
300 if (unlikely(ddp_mo != wqe->processed)) {
301 pr_warn("siw: [QP %u], send mo: %u != %u\n",
302 qp_id(rx_qp(srx)), ddp_mo, wqe->processed);
303 ecode = DDP_ECODE_UT_INVALID_MO;
304 goto error;
305 }
306 if (frx->first_ddp_seg) {
307
308 frx->sge_idx = 0;
309 frx->sge_off = 0;
310 frx->pbl_idx = 0;
311
312
313 srx->inval_stag = be32_to_cpu(send->inval_stag);
314 }
315 if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) {
316 siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n",
317 wqe->bytes, wqe->processed, srx->fpdu_part_rem);
318 wqe->wc_status = SIW_WC_LOC_LEN_ERR;
319 ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF;
320 goto error;
321 }
322 return 0;
323error:
324 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
325 DDP_ETYPE_UNTAGGED_BUF, ecode, 0);
326 return -EINVAL;
327}
328
329static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
330{
331 struct siw_rqe *rqe;
332 struct siw_srq *srq;
333 struct siw_wqe *wqe = NULL;
334 bool srq_event = false;
335 unsigned long flags;
336
337 srq = qp->srq;
338 if (srq) {
339 spin_lock_irqsave(&srq->lock, flags);
340 if (unlikely(!srq->num_rqe))
341 goto out;
342
343 rqe = &srq->recvq[srq->rq_get % srq->num_rqe];
344 } else {
345 if (unlikely(!qp->recvq))
346 goto out;
347
348 rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size];
349 }
350 if (likely(rqe->flags == SIW_WQE_VALID)) {
351 int num_sge = rqe->num_sge;
352
353 if (likely(num_sge <= SIW_MAX_SGE)) {
354 int i = 0;
355
356 wqe = rx_wqe(&qp->rx_untagged);
357 rx_type(wqe) = SIW_OP_RECEIVE;
358 wqe->wr_status = SIW_WR_INPROGRESS;
359 wqe->bytes = 0;
360 wqe->processed = 0;
361
362 wqe->rqe.id = rqe->id;
363 wqe->rqe.num_sge = num_sge;
364
365 while (i < num_sge) {
366 wqe->rqe.sge[i].laddr = rqe->sge[i].laddr;
367 wqe->rqe.sge[i].lkey = rqe->sge[i].lkey;
368 wqe->rqe.sge[i].length = rqe->sge[i].length;
369 wqe->bytes += wqe->rqe.sge[i].length;
370 wqe->mem[i] = NULL;
371 i++;
372 }
373
374 smp_store_mb(rqe->flags, 0);
375 } else {
376 siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge);
377 if (srq)
378 spin_unlock_irqrestore(&srq->lock, flags);
379 return NULL;
380 }
381 if (!srq) {
382 qp->rq_get++;
383 } else {
384 if (srq->armed) {
385
386 u32 off = (srq->rq_get + srq->limit) %
387 srq->num_rqe;
388 struct siw_rqe *rqe2 = &srq->recvq[off];
389
390 if (!(rqe2->flags & SIW_WQE_VALID)) {
391 srq->armed = false;
392 srq_event = true;
393 }
394 }
395 srq->rq_get++;
396 }
397 }
398out:
399 if (srq) {
400 spin_unlock_irqrestore(&srq->lock, flags);
401 if (srq_event)
402 siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED);
403 }
404 return wqe;
405}
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420int siw_proc_send(struct siw_qp *qp)
421{
422 struct siw_rx_stream *srx = &qp->rx_stream;
423 struct siw_rx_fpdu *frx = &qp->rx_untagged;
424 struct siw_wqe *wqe;
425 u32 data_bytes;
426 u32 rcvd_bytes;
427 int rv = 0;
428
429 if (frx->first_ddp_seg) {
430 wqe = siw_rqe_get(qp);
431 if (unlikely(!wqe)) {
432 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
433 DDP_ETYPE_UNTAGGED_BUF,
434 DDP_ECODE_UT_INVALID_MSN_NOBUF, 0);
435 return -ENOENT;
436 }
437 } else {
438 wqe = rx_wqe(frx);
439 }
440 if (srx->state == SIW_GET_DATA_START) {
441 rv = siw_send_check_ntoh(srx, frx);
442 if (unlikely(rv)) {
443 siw_qp_event(qp, IB_EVENT_QP_FATAL);
444 return rv;
445 }
446 if (!srx->fpdu_part_rem)
447 return 0;
448 }
449 data_bytes = min(srx->fpdu_part_rem, srx->skb_new);
450 rcvd_bytes = 0;
451
452
453 while (data_bytes) {
454 struct ib_pd *pd;
455 struct siw_mem **mem, *mem_p;
456 struct siw_sge *sge;
457 u32 sge_bytes;
458
459 sge = &wqe->rqe.sge[frx->sge_idx];
460
461 if (!sge->length) {
462
463 frx->sge_idx++;
464 frx->sge_off = 0;
465 frx->pbl_idx = 0;
466 continue;
467 }
468 sge_bytes = min(data_bytes, sge->length - frx->sge_off);
469 mem = &wqe->mem[frx->sge_idx];
470
471
472
473
474 pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd;
475
476 rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE,
477 frx->sge_off, sge_bytes);
478 if (unlikely(rv)) {
479 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
480 DDP_ETYPE_CATASTROPHIC,
481 DDP_ECODE_CATASTROPHIC, 0);
482
483 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
484 break;
485 }
486 mem_p = *mem;
487 if (mem_p->mem_obj == NULL)
488 rv = siw_rx_kva(srx,
489 (void *)(uintptr_t)(sge->laddr + frx->sge_off),
490 sge_bytes);
491 else if (!mem_p->is_pbl)
492 rv = siw_rx_umem(srx, mem_p->umem,
493 sge->laddr + frx->sge_off, sge_bytes);
494 else
495 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
496 sge->laddr + frx->sge_off, sge_bytes);
497
498 if (unlikely(rv != sge_bytes)) {
499 wqe->processed += rcvd_bytes;
500
501 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
502 DDP_ETYPE_CATASTROPHIC,
503 DDP_ECODE_CATASTROPHIC, 0);
504 return -EINVAL;
505 }
506 frx->sge_off += rv;
507
508 if (frx->sge_off == sge->length) {
509 frx->sge_idx++;
510 frx->sge_off = 0;
511 frx->pbl_idx = 0;
512 }
513 data_bytes -= rv;
514 rcvd_bytes += rv;
515
516 srx->fpdu_part_rem -= rv;
517 srx->fpdu_part_rcvd += rv;
518 }
519 wqe->processed += rcvd_bytes;
520
521 if (!srx->fpdu_part_rem)
522 return 0;
523
524 return (rv < 0) ? rv : -EAGAIN;
525}
526
527
528
529
530
531
532
533
534
535
536
537
538
539int siw_proc_write(struct siw_qp *qp)
540{
541 struct siw_rx_stream *srx = &qp->rx_stream;
542 struct siw_rx_fpdu *frx = &qp->rx_tagged;
543 struct siw_mem *mem;
544 int bytes, rv;
545
546 if (srx->state == SIW_GET_DATA_START) {
547 if (!srx->fpdu_part_rem)
548 return 0;
549
550 rv = siw_write_check_ntoh(srx, frx);
551 if (unlikely(rv)) {
552 siw_qp_event(qp, IB_EVENT_QP_FATAL);
553 return rv;
554 }
555 }
556 bytes = min(srx->fpdu_part_rem, srx->skb_new);
557
558 if (frx->first_ddp_seg) {
559 struct siw_wqe *wqe = rx_wqe(frx);
560
561 rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8);
562 if (unlikely(!rx_mem(frx))) {
563 siw_dbg_qp(qp,
564 "sink stag not found/invalid, stag 0x%08x\n",
565 srx->ddp_stag);
566
567 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
568 DDP_ETYPE_TAGGED_BUF,
569 DDP_ECODE_T_INVALID_STAG, 0);
570 return -EINVAL;
571 }
572 wqe->rqe.num_sge = 1;
573 rx_type(wqe) = SIW_OP_WRITE;
574 wqe->wr_status = SIW_WR_INPROGRESS;
575 }
576 mem = rx_mem(frx);
577
578
579
580
581
582 if (unlikely(mem->stag != srx->ddp_stag)) {
583 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
584 DDP_ETYPE_TAGGED_BUF,
585 DDP_ECODE_T_INVALID_STAG, 0);
586 return -EINVAL;
587 }
588 rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd,
589 IB_ACCESS_REMOTE_WRITE, bytes);
590 if (unlikely(rv)) {
591 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
592 DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv),
593 0);
594
595 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
596
597 return -EINVAL;
598 }
599
600 if (mem->mem_obj == NULL)
601 rv = siw_rx_kva(srx,
602 (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd),
603 bytes);
604 else if (!mem->is_pbl)
605 rv = siw_rx_umem(srx, mem->umem,
606 srx->ddp_to + srx->fpdu_part_rcvd, bytes);
607 else
608 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem,
609 srx->ddp_to + srx->fpdu_part_rcvd, bytes);
610
611 if (unlikely(rv != bytes)) {
612 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
613 DDP_ETYPE_CATASTROPHIC,
614 DDP_ECODE_CATASTROPHIC, 0);
615 return -EINVAL;
616 }
617 srx->fpdu_part_rem -= rv;
618 srx->fpdu_part_rcvd += rv;
619
620 if (!srx->fpdu_part_rem) {
621 srx->ddp_to += srx->fpdu_part_rcvd;
622 return 0;
623 }
624 return -EAGAIN;
625}
626
627
628
629
630int siw_proc_rreq(struct siw_qp *qp)
631{
632 struct siw_rx_stream *srx = &qp->rx_stream;
633
634 if (!srx->fpdu_part_rem)
635 return 0;
636
637 pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp),
638 be16_to_cpu(srx->hdr.ctrl.mpa_len));
639
640 return -EPROTO;
641}
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
660{
661 struct siw_wqe *tx_work = tx_wqe(qp);
662 struct siw_sqe *resp;
663
664 uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to),
665 laddr = be64_to_cpu(srx->hdr.rreq.source_to);
666 uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size),
667 lkey = be32_to_cpu(srx->hdr.rreq.source_stag),
668 rkey = be32_to_cpu(srx->hdr.rreq.sink_stag),
669 msn = be32_to_cpu(srx->hdr.rreq.ddp_msn);
670
671 int run_sq = 1, rv = 0;
672 unsigned long flags;
673
674 if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) {
675 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
676 DDP_ETYPE_UNTAGGED_BUF,
677 DDP_ECODE_UT_INVALID_MSN_RANGE, 0);
678 return -EPROTO;
679 }
680 spin_lock_irqsave(&qp->sq_lock, flags);
681
682 if (tx_work->wr_status == SIW_WR_IDLE) {
683
684
685
686
687 tx_work->processed = 0;
688 tx_work->mem[0] = NULL;
689 tx_work->wr_status = SIW_WR_QUEUED;
690 resp = &tx_work->sqe;
691 } else {
692 resp = irq_alloc_free(qp);
693 run_sq = 0;
694 }
695 if (likely(resp)) {
696 resp->opcode = SIW_OP_READ_RESPONSE;
697
698 resp->sge[0].length = length;
699 resp->sge[0].laddr = laddr;
700 resp->sge[0].lkey = lkey;
701
702
703
704
705 resp->sge[1].length = msn;
706
707 resp->raddr = raddr;
708 resp->rkey = rkey;
709 resp->num_sge = length ? 1 : 0;
710
711
712 smp_store_mb(resp->flags, SIW_WQE_VALID);
713 } else {
714 pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp),
715 qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size);
716
717 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
718 RDMAP_ETYPE_REMOTE_OPERATION,
719 RDMAP_ECODE_CATASTROPHIC_STREAM, 0);
720 rv = -EPROTO;
721 }
722
723 spin_unlock_irqrestore(&qp->sq_lock, flags);
724
725 if (run_sq)
726 rv = siw_sq_start(qp);
727
728 return rv;
729}
730
731
732
733
734
735
736
737static int siw_orqe_start_rx(struct siw_qp *qp)
738{
739 struct siw_sqe *orqe;
740 struct siw_wqe *wqe = NULL;
741
742
743 smp_mb();
744
745 orqe = orq_get_current(qp);
746 if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) {
747
748 wqe = rx_wqe(&qp->rx_tagged);
749 wqe->sqe.id = orqe->id;
750 wqe->sqe.opcode = orqe->opcode;
751 wqe->sqe.sge[0].laddr = orqe->sge[0].laddr;
752 wqe->sqe.sge[0].lkey = orqe->sge[0].lkey;
753 wqe->sqe.sge[0].length = orqe->sge[0].length;
754 wqe->sqe.flags = orqe->flags;
755 wqe->sqe.num_sge = 1;
756 wqe->bytes = orqe->sge[0].length;
757 wqe->processed = 0;
758 wqe->mem[0] = NULL;
759
760 smp_wmb();
761 wqe->wr_status = SIW_WR_INPROGRESS;
762
763 return 0;
764 }
765 return -EPROTO;
766}
767
768
769
770
771
772
773
774
775
776
777int siw_proc_rresp(struct siw_qp *qp)
778{
779 struct siw_rx_stream *srx = &qp->rx_stream;
780 struct siw_rx_fpdu *frx = &qp->rx_tagged;
781 struct siw_wqe *wqe = rx_wqe(frx);
782 struct siw_mem **mem, *mem_p;
783 struct siw_sge *sge;
784 int bytes, rv;
785
786 if (frx->first_ddp_seg) {
787 if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
788 pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n",
789 qp_id(qp), wqe->wr_status, wqe->sqe.opcode);
790 rv = -EPROTO;
791 goto error_term;
792 }
793
794
795
796 rv = siw_orqe_start_rx(qp);
797 if (rv) {
798 pr_warn("siw: [QP %u]: ORQ empty at idx %d\n",
799 qp_id(qp), qp->orq_get % qp->attrs.orq_size);
800 goto error_term;
801 }
802 rv = siw_rresp_check_ntoh(srx, frx);
803 if (unlikely(rv)) {
804 siw_qp_event(qp, IB_EVENT_QP_FATAL);
805 return rv;
806 }
807 } else {
808 if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) {
809 pr_warn("siw: [QP %u]: resume RRESP: status %d\n",
810 qp_id(qp), wqe->wr_status);
811 rv = -EPROTO;
812 goto error_term;
813 }
814 }
815 if (!srx->fpdu_part_rem)
816 return 0;
817
818 sge = wqe->sqe.sge;
819 mem = &wqe->mem[0];
820
821 if (!(*mem)) {
822
823
824
825 rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0,
826 wqe->bytes);
827 if (unlikely(rv)) {
828 siw_dbg_qp(qp, "target mem check: %d\n", rv);
829 wqe->wc_status = SIW_WC_LOC_PROT_ERR;
830
831 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
832 DDP_ETYPE_TAGGED_BUF,
833 siw_tagged_error(-rv), 0);
834
835 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
836
837 return -EINVAL;
838 }
839 }
840 mem_p = *mem;
841
842 bytes = min(srx->fpdu_part_rem, srx->skb_new);
843
844 if (mem_p->mem_obj == NULL)
845 rv = siw_rx_kva(srx,
846 (void *)(uintptr_t)(sge->laddr + wqe->processed),
847 bytes);
848 else if (!mem_p->is_pbl)
849 rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed,
850 bytes);
851 else
852 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
853 sge->laddr + wqe->processed, bytes);
854 if (rv != bytes) {
855 wqe->wc_status = SIW_WC_GENERAL_ERR;
856 rv = -EINVAL;
857 goto error_term;
858 }
859 srx->fpdu_part_rem -= rv;
860 srx->fpdu_part_rcvd += rv;
861 wqe->processed += rv;
862
863 if (!srx->fpdu_part_rem) {
864 srx->ddp_to += srx->fpdu_part_rcvd;
865 return 0;
866 }
867 return -EAGAIN;
868
869error_term:
870 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC,
871 DDP_ECODE_CATASTROPHIC, 0);
872 return rv;
873}
874
875int siw_proc_terminate(struct siw_qp *qp)
876{
877 struct siw_rx_stream *srx = &qp->rx_stream;
878 struct sk_buff *skb = srx->skb;
879 struct iwarp_terminate *term = &srx->hdr.terminate;
880 union iwarp_hdr term_info;
881 u8 *infop = (u8 *)&term_info;
882 enum rdma_opcode op;
883 u16 to_copy = sizeof(struct iwarp_ctrl);
884
885 pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n",
886 __rdmap_term_layer(term), __rdmap_term_etype(term),
887 __rdmap_term_ecode(term));
888
889 if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE ||
890 be32_to_cpu(term->ddp_msn) !=
891 qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] ||
892 be32_to_cpu(term->ddp_mo) != 0) {
893 pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n",
894 be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn),
895 be32_to_cpu(term->ddp_mo));
896 return -ECONNRESET;
897 }
898
899
900
901 if (!term->flag_m)
902 return -ECONNRESET;
903
904
905
906
907 if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged))
908 return -ECONNRESET;
909
910 memset(infop, 0, sizeof(term_info));
911
912 skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
913
914 op = __rdmap_get_opcode(&term_info.ctrl);
915 if (op >= RDMAP_TERMINATE)
916 goto out;
917
918 infop += to_copy;
919 srx->skb_offset += to_copy;
920 srx->skb_new -= to_copy;
921 srx->skb_copied += to_copy;
922 srx->fpdu_part_rcvd += to_copy;
923 srx->fpdu_part_rem -= to_copy;
924
925 to_copy = iwarp_pktinfo[op].hdr_len - to_copy;
926
927
928 if (to_copy + MPA_CRC_SIZE > srx->skb_new)
929 return -ECONNRESET;
930
931 skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
932
933 if (term->flag_r) {
934 siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n",
935 op, be16_to_cpu(term_info.ctrl.mpa_len),
936 term->flag_m ? "valid" : "invalid");
937 } else if (term->flag_d) {
938 siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n",
939 op, be16_to_cpu(term_info.ctrl.mpa_len),
940 term->flag_m ? "valid" : "invalid");
941 }
942out:
943 srx->skb_new -= to_copy;
944 srx->skb_offset += to_copy;
945 srx->skb_copied += to_copy;
946 srx->fpdu_part_rcvd += to_copy;
947 srx->fpdu_part_rem -= to_copy;
948
949 return -ECONNRESET;
950}
951
952static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
953{
954 struct sk_buff *skb = srx->skb;
955 u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
956 __wsum crc_in, crc_own = 0;
957
958 siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
959 srx->fpdu_part_rem, srx->skb_new, srx->pad);
960
961 if (srx->skb_new < srx->fpdu_part_rem)
962 return -EAGAIN;
963
964 skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem);
965
966 if (srx->mpa_crc_hd && srx->pad)
967 crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
968
969 srx->skb_new -= srx->fpdu_part_rem;
970 srx->skb_offset += srx->fpdu_part_rem;
971 srx->skb_copied += srx->fpdu_part_rem;
972
973 if (!srx->mpa_crc_hd)
974 return 0;
975
976
977
978
979
980 crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
981 crc_in = (__force __wsum)srx->trailer.crc;
982
983 if (unlikely(crc_in != crc_own)) {
984 pr_warn("siw: crc error. in: %08x, own %08x, op %u\n",
985 crc_in, crc_own, qp->rx_stream.rdmap_op);
986
987 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
988 LLP_ETYPE_MPA,
989 LLP_ECODE_RECEIVED_CRC, 0);
990 return -EINVAL;
991 }
992 return 0;
993}
994
995#define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged)
996
997static int siw_get_hdr(struct siw_rx_stream *srx)
998{
999 struct sk_buff *skb = srx->skb;
1000 struct siw_qp *qp = rx_qp(srx);
1001 struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl;
1002 struct siw_rx_fpdu *frx;
1003 u8 opcode;
1004 int bytes;
1005
1006 if (srx->fpdu_part_rcvd < MIN_DDP_HDR) {
1007
1008
1009
1010 bytes = min_t(int, srx->skb_new,
1011 MIN_DDP_HDR - srx->fpdu_part_rcvd);
1012
1013 skb_copy_bits(skb, srx->skb_offset,
1014 (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1015
1016 srx->fpdu_part_rcvd += bytes;
1017
1018 srx->skb_new -= bytes;
1019 srx->skb_offset += bytes;
1020 srx->skb_copied += bytes;
1021
1022 if (srx->fpdu_part_rcvd < MIN_DDP_HDR)
1023 return -EAGAIN;
1024
1025 if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) {
1026 enum ddp_etype etype;
1027 enum ddp_ecode ecode;
1028
1029 pr_warn("siw: received ddp version unsupported %d\n",
1030 __ddp_get_version(c_hdr));
1031
1032 if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) {
1033 etype = DDP_ETYPE_TAGGED_BUF;
1034 ecode = DDP_ECODE_T_VERSION;
1035 } else {
1036 etype = DDP_ETYPE_UNTAGGED_BUF;
1037 ecode = DDP_ECODE_UT_VERSION;
1038 }
1039 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
1040 etype, ecode, 0);
1041 return -EINVAL;
1042 }
1043 if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) {
1044 pr_warn("siw: received rdmap version unsupported %d\n",
1045 __rdmap_get_version(c_hdr));
1046
1047 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1048 RDMAP_ETYPE_REMOTE_OPERATION,
1049 RDMAP_ECODE_VERSION, 0);
1050 return -EINVAL;
1051 }
1052 opcode = __rdmap_get_opcode(c_hdr);
1053
1054 if (opcode > RDMAP_TERMINATE) {
1055 pr_warn("siw: received unknown packet type %u\n",
1056 opcode);
1057
1058 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1059 RDMAP_ETYPE_REMOTE_OPERATION,
1060 RDMAP_ECODE_OPCODE, 0);
1061 return -EINVAL;
1062 }
1063 siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode);
1064 } else {
1065 opcode = __rdmap_get_opcode(c_hdr);
1066 }
1067 set_rx_fpdu_context(qp, opcode);
1068 frx = qp->rx_fpdu;
1069
1070
1071
1072
1073
1074
1075
1076 if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
1077 bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR;
1078
1079 if (srx->skb_new < bytes)
1080 return -EAGAIN;
1081
1082 skb_copy_bits(skb, srx->skb_offset,
1083 (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1084
1085 srx->fpdu_part_rcvd += bytes;
1086
1087 srx->skb_new -= bytes;
1088 srx->skb_offset += bytes;
1089 srx->skb_copied += bytes;
1090 }
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104 if (srx->mpa_crc_hd) {
1105
1106
1107
1108 crypto_shash_init(srx->mpa_crc_hd);
1109 crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
1110 srx->fpdu_part_rcvd);
1111 }
1112 if (frx->more_ddp_segs) {
1113 frx->first_ddp_seg = 0;
1114 if (frx->prev_rdmap_op != opcode) {
1115 pr_warn("siw: packet intersection: %u : %u\n",
1116 frx->prev_rdmap_op, opcode);
1117
1118
1119
1120
1121
1122
1123
1124
1125 set_rx_fpdu_context(qp, frx->prev_rdmap_op);
1126 __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op);
1127 return -EPROTO;
1128 }
1129 } else {
1130 frx->prev_rdmap_op = opcode;
1131 frx->first_ddp_seg = 1;
1132 }
1133 frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1;
1134
1135 return 0;
1136}
1137
1138static int siw_check_tx_fence(struct siw_qp *qp)
1139{
1140 struct siw_wqe *tx_waiting = tx_wqe(qp);
1141 struct siw_sqe *rreq;
1142 int resume_tx = 0, rv = 0;
1143 unsigned long flags;
1144
1145 spin_lock_irqsave(&qp->orq_lock, flags);
1146
1147 rreq = orq_get_current(qp);
1148
1149
1150 WRITE_ONCE(rreq->flags, 0);
1151
1152 if (qp->tx_ctx.orq_fence) {
1153 if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
1154 pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
1155 qp_id(qp), tx_waiting->wr_status);
1156 rv = -EPROTO;
1157 goto out;
1158 }
1159
1160 if (tx_waiting->sqe.opcode == SIW_OP_READ ||
1161 tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
1162 rreq = orq_get_tail(qp);
1163 if (unlikely(!rreq)) {
1164 pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
1165 rv = -EPROTO;
1166 goto out;
1167 }
1168 siw_read_to_orq(rreq, &tx_waiting->sqe);
1169
1170 qp->orq_put++;
1171 qp->tx_ctx.orq_fence = 0;
1172 resume_tx = 1;
1173
1174 } else if (siw_orq_empty(qp)) {
1175 qp->tx_ctx.orq_fence = 0;
1176 resume_tx = 1;
1177 } else {
1178 pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
1179 qp_id(qp), qp->orq_get, qp->orq_put);
1180 rv = -EPROTO;
1181 }
1182 }
1183 qp->orq_get++;
1184out:
1185 spin_unlock_irqrestore(&qp->orq_lock, flags);
1186
1187 if (resume_tx)
1188 rv = siw_sq_start(qp);
1189
1190 return rv;
1191}
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205static int siw_rdmap_complete(struct siw_qp *qp, int error)
1206{
1207 struct siw_rx_stream *srx = &qp->rx_stream;
1208 struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu);
1209 enum siw_wc_status wc_status = wqe->wc_status;
1210 u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl);
1211 int rv = 0;
1212
1213 switch (opcode) {
1214 case RDMAP_SEND_SE:
1215 case RDMAP_SEND_SE_INVAL:
1216 wqe->rqe.flags |= SIW_WQE_SOLICITED;
1217
1218
1219 case RDMAP_SEND:
1220 case RDMAP_SEND_INVAL:
1221 if (wqe->wr_status == SIW_WR_IDLE)
1222 break;
1223
1224 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;
1225
1226 if (error != 0 && wc_status == SIW_WC_SUCCESS)
1227 wc_status = SIW_WC_GENERAL_ERR;
1228
1229
1230
1231 if (wc_status == SIW_WC_SUCCESS &&
1232 (opcode == RDMAP_SEND_INVAL ||
1233 opcode == RDMAP_SEND_SE_INVAL)) {
1234 rv = siw_invalidate_stag(qp->pd, srx->inval_stag);
1235 if (rv) {
1236 siw_init_terminate(
1237 qp, TERM_ERROR_LAYER_RDMAP,
1238 rv == -EACCES ?
1239 RDMAP_ETYPE_REMOTE_PROTECTION :
1240 RDMAP_ETYPE_REMOTE_OPERATION,
1241 RDMAP_ECODE_CANNOT_INVALIDATE, 0);
1242
1243 wc_status = SIW_WC_REM_INV_REQ_ERR;
1244 }
1245 rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1246 rv ? 0 : srx->inval_stag,
1247 wc_status);
1248 } else {
1249 rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1250 0, wc_status);
1251 }
1252 siw_wqe_put_mem(wqe, SIW_OP_RECEIVE);
1253 break;
1254
1255 case RDMAP_RDMA_READ_RESP:
1256 if (wqe->wr_status == SIW_WR_IDLE)
1257 break;
1258
1259 if (error != 0) {
1260 if ((srx->state == SIW_GET_HDR &&
1261 qp->rx_fpdu->first_ddp_seg) || error == -ENODATA)
1262
1263 break;
1264
1265 if (wc_status == SIW_WC_SUCCESS)
1266 wc_status = SIW_WC_GENERAL_ERR;
1267 } else if (rdma_is_kernel_res(&qp->base_qp.res) &&
1268 rx_type(wqe) == SIW_OP_READ_LOCAL_INV) {
1269
1270
1271
1272 rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey);
1273 if (rv) {
1274 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
1275 RDMAP_ETYPE_CATASTROPHIC,
1276 RDMAP_ECODE_UNSPECIFIED, 0);
1277
1278 if (wc_status == SIW_WC_SUCCESS) {
1279 wc_status = SIW_WC_GENERAL_ERR;
1280 error = rv;
1281 }
1282 }
1283 }
1284
1285
1286
1287 if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0)
1288 rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed,
1289 wc_status);
1290 siw_wqe_put_mem(wqe, SIW_OP_READ);
1291
1292 if (!error)
1293 rv = siw_check_tx_fence(qp);
1294 else
1295
1296 WRITE_ONCE(orq_get_current(qp)->flags, 0);
1297 break;
1298
1299 case RDMAP_RDMA_READ_REQ:
1300 if (!error) {
1301 rv = siw_init_rresp(qp, srx);
1302 srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
1303 }
1304 break;
1305
1306 case RDMAP_RDMA_WRITE:
1307 if (wqe->wr_status == SIW_WR_IDLE)
1308 break;
1309
1310
1311
1312
1313
1314
1315
1316 if (rx_mem(&qp->rx_tagged)) {
1317 siw_mem_put(rx_mem(&qp->rx_tagged));
1318 rx_mem(&qp->rx_tagged) = NULL;
1319 }
1320 break;
1321
1322 default:
1323 break;
1324 }
1325 wqe->wr_status = SIW_WR_IDLE;
1326
1327 return rv;
1328}
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
1341 unsigned int off, size_t len)
1342{
1343 struct siw_qp *qp = rd_desc->arg.data;
1344 struct siw_rx_stream *srx = &qp->rx_stream;
1345 int rv;
1346
1347 srx->skb = skb;
1348 srx->skb_new = skb->len - off;
1349 srx->skb_offset = off;
1350 srx->skb_copied = 0;
1351
1352 siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new);
1353
1354 while (srx->skb_new) {
1355 int run_completion = 1;
1356
1357 if (unlikely(srx->rx_suspend)) {
1358
1359 srx->skb_copied += srx->skb_new;
1360 break;
1361 }
1362 switch (srx->state) {
1363 case SIW_GET_HDR:
1364 rv = siw_get_hdr(srx);
1365 if (!rv) {
1366 srx->fpdu_part_rem =
1367 be16_to_cpu(srx->hdr.ctrl.mpa_len) -
1368 srx->fpdu_part_rcvd + MPA_HDR_SIZE;
1369
1370 if (srx->fpdu_part_rem)
1371 srx->pad = -srx->fpdu_part_rem & 0x3;
1372 else
1373 srx->pad = 0;
1374
1375 srx->state = SIW_GET_DATA_START;
1376 srx->fpdu_part_rcvd = 0;
1377 }
1378 break;
1379
1380 case SIW_GET_DATA_MORE:
1381
1382
1383
1384
1385
1386
1387 qp->rx_fpdu->first_ddp_seg = 0;
1388
1389
1390 case SIW_GET_DATA_START:
1391
1392
1393
1394
1395 rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp);
1396 if (!rv) {
1397 int mpa_len =
1398 be16_to_cpu(srx->hdr.ctrl.mpa_len)
1399 + MPA_HDR_SIZE;
1400
1401 srx->fpdu_part_rem = (-mpa_len & 0x3)
1402 + MPA_CRC_SIZE;
1403 srx->fpdu_part_rcvd = 0;
1404 srx->state = SIW_GET_TRAILER;
1405 } else {
1406 if (unlikely(rv == -ECONNRESET))
1407 run_completion = 0;
1408 else
1409 srx->state = SIW_GET_DATA_MORE;
1410 }
1411 break;
1412
1413 case SIW_GET_TRAILER:
1414
1415
1416
1417 rv = siw_get_trailer(qp, srx);
1418 if (likely(!rv)) {
1419
1420
1421
1422
1423 srx->state = SIW_GET_HDR;
1424 srx->fpdu_part_rcvd = 0;
1425
1426 if (!(srx->hdr.ctrl.ddp_rdmap_ctrl &
1427 DDP_FLAG_LAST))
1428
1429 break;
1430
1431 rv = siw_rdmap_complete(qp, 0);
1432 run_completion = 0;
1433 }
1434 break;
1435
1436 default:
1437 pr_warn("QP[%u]: RX out of state\n", qp_id(qp));
1438 rv = -EPROTO;
1439 run_completion = 0;
1440 }
1441 if (unlikely(rv != 0 && rv != -EAGAIN)) {
1442 if ((srx->state > SIW_GET_HDR ||
1443 qp->rx_fpdu->more_ddp_segs) && run_completion)
1444 siw_rdmap_complete(qp, rv);
1445
1446 siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
1447 srx->state);
1448
1449 siw_qp_cm_drop(qp, 1);
1450
1451 break;
1452 }
1453 if (rv) {
1454 siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n",
1455 srx->state, srx->fpdu_part_rem);
1456 break;
1457 }
1458 }
1459 return srx->skb_copied;
1460}
1461