1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/device.h>
36#include <linux/dmapool.h>
37#include <linux/ratelimit.h>
38
39#include "rds.h"
40#include "iw.h"
41
42static void rds_iw_send_rdma_complete(struct rds_message *rm,
43 int wc_status)
44{
45 int notify_status;
46
47 switch (wc_status) {
48 case IB_WC_WR_FLUSH_ERR:
49 return;
50
51 case IB_WC_SUCCESS:
52 notify_status = RDS_RDMA_SUCCESS;
53 break;
54
55 case IB_WC_REM_ACCESS_ERR:
56 notify_status = RDS_RDMA_REMOTE_ERROR;
57 break;
58
59 default:
60 notify_status = RDS_RDMA_OTHER_ERROR;
61 break;
62 }
63 rds_rdma_send_complete(rm, notify_status);
64}
65
66static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
67 struct rm_rdma_op *op)
68{
69 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->op_sg, op->op_nents,
72 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->op_mapped = 0;
74 }
75}
76
77static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
78 struct rds_iw_send_work *send,
79 int wc_status)
80{
81 struct rds_message *rm = send->s_rm;
82
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84
85 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->data.op_sg, rm->data.op_nents,
87 DMA_TO_DEVICE);
88
89 if (rm->rdma.op_active) {
90 rds_iw_send_unmap_rdma(ic, &rm->rdma);
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112 rds_iw_send_rdma_complete(rm, wc_status);
113
114 if (rm->rdma.op_write)
115 rds_stats_add(s_send_rdma_bytes, rm->rdma.op_bytes);
116 else
117 rds_stats_add(s_recv_rdma_bytes, rm->rdma.op_bytes);
118 }
119
120
121
122 rds_message_unmapped(rm);
123
124 rds_message_put(rm);
125 send->s_rm = NULL;
126}
127
128void rds_iw_send_init_ring(struct rds_iw_connection *ic)
129{
130 struct rds_iw_send_work *send;
131 u32 i;
132
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge;
135
136 send->s_rm = NULL;
137 send->s_op = NULL;
138 send->s_mapping = NULL;
139
140 send->s_wr.next = NULL;
141 send->s_wr.wr_id = i;
142 send->s_wr.sg_list = send->s_sge;
143 send->s_wr.num_sge = 1;
144 send->s_wr.opcode = IB_WR_SEND;
145 send->s_wr.send_flags = 0;
146 send->s_wr.ex.imm_data = 0;
147
148 sge = rds_iw_data_sge(ic, send->s_sge);
149 sge->lkey = 0;
150
151 sge = rds_iw_header_sge(ic, send->s_sge);
152 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
153 sge->length = sizeof(struct rds_header);
154 sge->lkey = 0;
155
156 send->s_mr = ib_alloc_fast_reg_mr(ic->i_pd, fastreg_message_size);
157 if (IS_ERR(send->s_mr)) {
158 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed\n");
159 break;
160 }
161
162 send->s_page_list = ib_alloc_fast_reg_page_list(
163 ic->i_cm_id->device, fastreg_message_size);
164 if (IS_ERR(send->s_page_list)) {
165 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
166 break;
167 }
168 }
169}
170
171void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
172{
173 struct rds_iw_send_work *send;
174 u32 i;
175
176 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
177 BUG_ON(!send->s_mr);
178 ib_dereg_mr(send->s_mr);
179 BUG_ON(!send->s_page_list);
180 ib_free_fast_reg_page_list(send->s_page_list);
181 if (send->s_wr.opcode == 0xdead)
182 continue;
183 if (send->s_rm)
184 rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
185 if (send->s_op)
186 rds_iw_send_unmap_rdma(ic, send->s_op);
187 }
188}
189
190
191
192
193
194
195
196void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
197{
198 struct rds_connection *conn = context;
199 struct rds_iw_connection *ic = conn->c_transport_data;
200 struct ib_wc wc;
201 struct rds_iw_send_work *send;
202 u32 completed;
203 u32 oldest;
204 u32 i;
205 int ret;
206
207 rdsdebug("cq %p conn %p\n", cq, conn);
208 rds_iw_stats_inc(s_iw_tx_cq_call);
209 ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
210 if (ret)
211 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
212
213 while (ib_poll_cq(cq, 1, &wc) > 0) {
214 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
215 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
216 be32_to_cpu(wc.ex.imm_data));
217 rds_iw_stats_inc(s_iw_tx_cq_event);
218
219 if (wc.status != IB_WC_SUCCESS) {
220 printk(KERN_ERR "WC Error: status = %d opcode = %d\n", wc.status, wc.opcode);
221 break;
222 }
223
224 if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == RDS_IW_LOCAL_INV_WR_ID) {
225 ic->i_fastreg_posted = 0;
226 continue;
227 }
228
229 if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
230 ic->i_fastreg_posted = 1;
231 continue;
232 }
233
234 if (wc.wr_id == RDS_IW_ACK_WR_ID) {
235 if (time_after(jiffies, ic->i_ack_queued + HZ/2))
236 rds_iw_stats_inc(s_iw_tx_stalled);
237 rds_iw_ack_send_complete(ic);
238 continue;
239 }
240
241 oldest = rds_iw_ring_oldest(&ic->i_send_ring);
242
243 completed = rds_iw_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
244
245 for (i = 0; i < completed; i++) {
246 send = &ic->i_sends[oldest];
247
248
249 switch (send->s_wr.opcode) {
250 case IB_WR_SEND:
251 if (send->s_rm)
252 rds_iw_send_unmap_rm(ic, send, wc.status);
253 break;
254 case IB_WR_FAST_REG_MR:
255 case IB_WR_RDMA_WRITE:
256 case IB_WR_RDMA_READ:
257 case IB_WR_RDMA_READ_WITH_INV:
258
259
260 break;
261 default:
262 printk_ratelimited(KERN_NOTICE
263 "RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
264 __func__, send->s_wr.opcode);
265 break;
266 }
267
268 send->s_wr.opcode = 0xdead;
269 send->s_wr.num_sge = 1;
270 if (time_after(jiffies, send->s_queued + HZ/2))
271 rds_iw_stats_inc(s_iw_tx_stalled);
272
273
274
275
276
277 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) {
278 struct rds_message *rm;
279
280 rm = rds_send_get_message(conn, send->s_op);
281 if (rm)
282 rds_iw_send_rdma_complete(rm, wc.status);
283 }
284
285 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
286 }
287
288 rds_iw_ring_free(&ic->i_send_ring, completed);
289
290 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
291 test_bit(0, &conn->c_map_queued))
292 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
293
294
295 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
296 rds_iw_conn_error(conn,
297 "send completion on %pI4 "
298 "had status %u, disconnecting and reconnecting\n",
299 &conn->c_faddr, wc.status);
300 }
301 }
302}
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348int rds_iw_send_grab_credits(struct rds_iw_connection *ic,
349 u32 wanted, u32 *adv_credits, int need_posted, int max_posted)
350{
351 unsigned int avail, posted, got = 0, advertise;
352 long oldval, newval;
353
354 *adv_credits = 0;
355 if (!ic->i_flowctl)
356 return wanted;
357
358try_again:
359 advertise = 0;
360 oldval = newval = atomic_read(&ic->i_credits);
361 posted = IB_GET_POST_CREDITS(oldval);
362 avail = IB_GET_SEND_CREDITS(oldval);
363
364 rdsdebug("wanted=%u credits=%u posted=%u\n",
365 wanted, avail, posted);
366
367
368 if (avail && !posted)
369 avail--;
370
371 if (avail < wanted) {
372 struct rds_connection *conn = ic->i_cm_id->context;
373
374
375 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
376 got = avail;
377 } else {
378
379 got = wanted;
380 }
381 newval -= IB_SET_SEND_CREDITS(got);
382
383
384
385
386
387
388 if (posted && (got || need_posted)) {
389 advertise = min_t(unsigned int, posted, max_posted);
390 newval -= IB_SET_POST_CREDITS(advertise);
391 }
392
393
394 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
395 goto try_again;
396
397 *adv_credits = advertise;
398 return got;
399}
400
401void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits)
402{
403 struct rds_iw_connection *ic = conn->c_transport_data;
404
405 if (credits == 0)
406 return;
407
408 rdsdebug("credits=%u current=%u%s\n",
409 credits,
410 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
411 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
412
413 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
414 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
415 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
416
417 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
418
419 rds_iw_stats_inc(s_iw_rx_credit_updates);
420}
421
422void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted)
423{
424 struct rds_iw_connection *ic = conn->c_transport_data;
425
426 if (posted == 0)
427 return;
428
429 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
430
431
432
433
434
435
436
437
438
439
440
441
442
443 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
444 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
445}
446
447static inline void
448rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
449 struct rds_iw_send_work *send, unsigned int pos,
450 unsigned long buffer, unsigned int length,
451 int send_flags)
452{
453 struct ib_sge *sge;
454
455 WARN_ON(pos != send - ic->i_sends);
456
457 send->s_wr.send_flags = send_flags;
458 send->s_wr.opcode = IB_WR_SEND;
459 send->s_wr.num_sge = 2;
460 send->s_wr.next = NULL;
461 send->s_queued = jiffies;
462 send->s_op = NULL;
463
464 if (length != 0) {
465 sge = rds_iw_data_sge(ic, send->s_sge);
466 sge->addr = buffer;
467 sge->length = length;
468 sge->lkey = rds_iw_local_dma_lkey(ic);
469
470 sge = rds_iw_header_sge(ic, send->s_sge);
471 } else {
472
473
474 send->s_wr.num_sge = 1;
475 sge = &send->s_sge[0];
476 }
477
478 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
479 sge->length = sizeof(struct rds_header);
480 sge->lkey = rds_iw_local_dma_lkey(ic);
481}
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
497 unsigned int hdr_off, unsigned int sg, unsigned int off)
498{
499 struct rds_iw_connection *ic = conn->c_transport_data;
500 struct ib_device *dev = ic->i_cm_id->device;
501 struct rds_iw_send_work *send = NULL;
502 struct rds_iw_send_work *first;
503 struct rds_iw_send_work *prev;
504 struct ib_send_wr *failed_wr;
505 struct scatterlist *scat;
506 u32 pos;
507 u32 i;
508 u32 work_alloc;
509 u32 credit_alloc;
510 u32 posted;
511 u32 adv_credits = 0;
512 int send_flags = 0;
513 int sent;
514 int ret;
515 int flow_controlled = 0;
516
517 BUG_ON(off % RDS_FRAG_SIZE);
518 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
519
520
521 if (rds_rdma_cookie_key(rm->m_rdma_cookie) && !ic->i_fastreg_posted) {
522 ret = -EAGAIN;
523 goto out;
524 }
525
526
527 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
528 i = 1;
529 else
530 i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
531
532 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
533 if (work_alloc == 0) {
534 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
535 rds_iw_stats_inc(s_iw_tx_ring_full);
536 ret = -ENOMEM;
537 goto out;
538 }
539
540 credit_alloc = work_alloc;
541 if (ic->i_flowctl) {
542 credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
543 adv_credits += posted;
544 if (credit_alloc < work_alloc) {
545 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
546 work_alloc = credit_alloc;
547 flow_controlled++;
548 }
549 if (work_alloc == 0) {
550 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
551 rds_iw_stats_inc(s_iw_tx_throttle);
552 ret = -ENOMEM;
553 goto out;
554 }
555 }
556
557
558 if (!ic->i_rm) {
559
560
561
562
563
564
565 if (rm->data.op_nents) {
566 rm->data.op_count = ib_dma_map_sg(dev,
567 rm->data.op_sg,
568 rm->data.op_nents,
569 DMA_TO_DEVICE);
570 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
571 if (rm->data.op_count == 0) {
572 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
573 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
574 ret = -ENOMEM;
575 goto out;
576 }
577 } else {
578 rm->data.op_count = 0;
579 }
580
581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
582 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
583 rds_message_addref(rm);
584 rm->data.op_dmasg = 0;
585 rm->data.op_dmaoff = 0;
586 ic->i_rm = rm;
587
588
589 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
590 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
591 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
592 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
593
594
595
596 if (rm->rdma.op_active) {
597 struct rds_ext_header_rdma ext_hdr;
598
599 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
600 rds_message_add_extension(&rm->m_inc.i_hdr,
601 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
602 }
603 if (rm->m_rdma_cookie) {
604 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
605 rds_rdma_cookie_key(rm->m_rdma_cookie),
606 rds_rdma_cookie_offset(rm->m_rdma_cookie));
607 }
608
609
610
611
612
613 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_iw_piggyb_ack(ic));
614 rds_message_make_checksum(&rm->m_inc.i_hdr);
615
616
617
618
619 rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
620 adv_credits += posted;
621 BUG_ON(adv_credits > 255);
622 }
623
624 send = &ic->i_sends[pos];
625 first = send;
626 prev = NULL;
627 scat = &rm->data.op_sg[rm->data.op_dmasg];
628 sent = 0;
629 i = 0;
630
631
632
633
634
635
636
637 if (rm->rdma.op_active && rm->rdma.op_fence)
638 send_flags = IB_SEND_FENCE;
639
640
641
642
643
644
645
646
647
648
649
650 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) {
651 rds_iw_xmit_populate_wr(ic, send, pos, 0, 0, send_flags);
652 goto add_header;
653 }
654
655
656 for (; i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]; i++) {
657 unsigned int len;
658
659 send = &ic->i_sends[pos];
660
661 len = min(RDS_FRAG_SIZE,
662 ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
663 rds_iw_xmit_populate_wr(ic, send, pos,
664 ib_sg_dma_address(dev, scat) + rm->data.op_dmaoff, len,
665 send_flags);
666
667
668
669
670
671
672 if (ic->i_unsignaled_wrs-- == 0) {
673 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
674 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
675 }
676
677 ic->i_unsignaled_bytes -= len;
678 if (ic->i_unsignaled_bytes <= 0) {
679 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
680 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
681 }
682
683
684
685
686 if (flow_controlled && i == (work_alloc-1))
687 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
688
689 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
690 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
691
692 sent += len;
693 rm->data.op_dmaoff += len;
694 if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
695 scat++;
696 rm->data.op_dmaoff = 0;
697 rm->data.op_dmasg++;
698 }
699
700add_header:
701
702
703 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
704
705 if (0) {
706 struct rds_header *hdr = &ic->i_send_hdrs[pos];
707
708 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
709 be16_to_cpu(hdr->h_dport),
710 hdr->h_flags,
711 be32_to_cpu(hdr->h_len));
712 }
713 if (adv_credits) {
714 struct rds_header *hdr = &ic->i_send_hdrs[pos];
715
716
717 hdr->h_credit = adv_credits;
718 rds_message_make_checksum(hdr);
719 adv_credits = 0;
720 rds_iw_stats_inc(s_iw_tx_credit_updates);
721 }
722
723 if (prev)
724 prev->s_wr.next = &send->s_wr;
725 prev = send;
726
727 pos = (pos + 1) % ic->i_send_ring.w_nr;
728 }
729
730
731
732 if (hdr_off == 0)
733 sent += sizeof(struct rds_header);
734
735
736 if (scat == &rm->data.op_sg[rm->data.op_count]) {
737 prev->s_rm = ic->i_rm;
738 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
739 ic->i_rm = NULL;
740 }
741
742 if (i < work_alloc) {
743 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
744 work_alloc = i;
745 }
746 if (ic->i_flowctl && i < credit_alloc)
747 rds_iw_send_add_credits(conn, credit_alloc - i);
748
749
750 failed_wr = &first->s_wr;
751 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
752 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
753 first, &first->s_wr, ret, failed_wr);
754 BUG_ON(failed_wr != &first->s_wr);
755 if (ret) {
756 printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
757 "returned %d\n", &conn->c_faddr, ret);
758 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
759 if (prev->s_rm) {
760 ic->i_rm = prev->s_rm;
761 prev->s_rm = NULL;
762 }
763 goto out;
764 }
765
766 ret = sent;
767out:
768 BUG_ON(adv_credits);
769 return ret;
770}
771
772static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr)
773{
774 BUG_ON(nent > send->s_page_list->max_page_list_len);
775
776
777
778
779
780 send->s_wr.opcode = IB_WR_FAST_REG_MR;
781 send->s_wr.wr.fast_reg.length = len;
782 send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey;
783 send->s_wr.wr.fast_reg.page_list = send->s_page_list;
784 send->s_wr.wr.fast_reg.page_list_len = nent;
785 send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
786 send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
787 send->s_wr.wr.fast_reg.iova_start = sg_addr;
788
789 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
790}
791
792int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
793{
794 struct rds_iw_connection *ic = conn->c_transport_data;
795 struct rds_iw_send_work *send = NULL;
796 struct rds_iw_send_work *first;
797 struct rds_iw_send_work *prev;
798 struct ib_send_wr *failed_wr;
799 struct rds_iw_device *rds_iwdev;
800 struct scatterlist *scat;
801 unsigned long len;
802 u64 remote_addr = op->op_remote_addr;
803 u32 pos, fr_pos;
804 u32 work_alloc;
805 u32 i;
806 u32 j;
807 int sent;
808 int ret;
809 int num_sge;
810
811 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
812
813
814 if (!op->op_mapped) {
815 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
816 op->op_sg, op->op_nents, (op->op_write) ?
817 DMA_TO_DEVICE : DMA_FROM_DEVICE);
818 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
819 if (op->op_count == 0) {
820 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
821 ret = -ENOMEM;
822 goto out;
823 }
824
825 op->op_mapped = 1;
826 }
827
828 if (!op->op_write) {
829
830 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
831 if (work_alloc != 1) {
832 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
833 rds_iw_stats_inc(s_iw_tx_ring_full);
834 ret = -ENOMEM;
835 goto out;
836 }
837 }
838
839
840
841
842
843 i = ceil(op->op_count, rds_iwdev->max_sge);
844
845 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
846 if (work_alloc != i) {
847 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
848 rds_iw_stats_inc(s_iw_tx_ring_full);
849 ret = -ENOMEM;
850 goto out;
851 }
852
853 send = &ic->i_sends[pos];
854 if (!op->op_write) {
855 first = prev = &ic->i_sends[fr_pos];
856 } else {
857 first = send;
858 prev = NULL;
859 }
860 scat = &op->op_sg[0];
861 sent = 0;
862 num_sge = op->op_count;
863
864 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
865 send->s_wr.send_flags = 0;
866 send->s_queued = jiffies;
867
868
869
870
871
872 if (ic->i_unsignaled_wrs-- == 0) {
873 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
874 send->s_wr.send_flags = IB_SEND_SIGNALED;
875 }
876
877
878
879
880
881 if (op->op_write)
882 send->s_wr.opcode = IB_WR_RDMA_WRITE;
883 else
884 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
885
886 send->s_wr.wr.rdma.remote_addr = remote_addr;
887 send->s_wr.wr.rdma.rkey = op->op_rkey;
888 send->s_op = op;
889
890 if (num_sge > rds_iwdev->max_sge) {
891 send->s_wr.num_sge = rds_iwdev->max_sge;
892 num_sge -= rds_iwdev->max_sge;
893 } else
894 send->s_wr.num_sge = num_sge;
895
896 send->s_wr.next = NULL;
897
898 if (prev)
899 prev->s_wr.next = &send->s_wr;
900
901 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
902 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
903
904 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
905 send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
906 else {
907 send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
908 send->s_sge[j].length = len;
909 send->s_sge[j].lkey = rds_iw_local_dma_lkey(ic);
910 }
911
912 sent += len;
913 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
914 remote_addr += len;
915
916 scat++;
917 }
918
919 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
920 send->s_wr.num_sge = 1;
921 send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
922 send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
923 send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
924 }
925
926 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
927 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
928
929 prev = send;
930 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
931 send = ic->i_sends;
932 }
933
934
935 if (scat == &op->op_sg[op->op_count])
936 first->s_wr.send_flags = IB_SEND_SIGNALED;
937
938 if (i < work_alloc) {
939 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
940 work_alloc = i;
941 }
942
943
944
945
946
947
948
949 if (!op->op_write) {
950 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
951 op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
952 work_alloc++;
953 }
954
955 failed_wr = &first->s_wr;
956 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
957 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
958 first, &first->s_wr, ret, failed_wr);
959 BUG_ON(failed_wr != &first->s_wr);
960 if (ret) {
961 printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
962 "returned %d\n", &conn->c_faddr, ret);
963 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
964 goto out;
965 }
966
967out:
968 return ret;
969}
970
971void rds_iw_xmit_complete(struct rds_connection *conn)
972{
973 struct rds_iw_connection *ic = conn->c_transport_data;
974
975
976
977 rds_iw_attempt_ack(ic);
978}
979