1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/device.h>
36#include <linux/dmapool.h>
37
38#include "rds.h"
39#include "iw.h"
40
41static void rds_iw_send_rdma_complete(struct rds_message *rm,
42 int wc_status)
43{
44 int notify_status;
45
46 switch (wc_status) {
47 case IB_WC_WR_FLUSH_ERR:
48 return;
49
50 case IB_WC_SUCCESS:
51 notify_status = RDS_RDMA_SUCCESS;
52 break;
53
54 case IB_WC_REM_ACCESS_ERR:
55 notify_status = RDS_RDMA_REMOTE_ERROR;
56 break;
57
58 default:
59 notify_status = RDS_RDMA_OTHER_ERROR;
60 break;
61 }
62 rds_rdma_send_complete(rm, notify_status);
63}
64
65static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
66 struct rm_rdma_op *op)
67{
68 if (op->op_mapped) {
69 ib_dma_unmap_sg(ic->i_cm_id->device,
70 op->op_sg, op->op_nents,
71 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
72 op->op_mapped = 0;
73 }
74}
75
76static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
77 struct rds_iw_send_work *send,
78 int wc_status)
79{
80 struct rds_message *rm = send->s_rm;
81
82 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
83
84 ib_dma_unmap_sg(ic->i_cm_id->device,
85 rm->data.op_sg, rm->data.op_nents,
86 DMA_TO_DEVICE);
87
88 if (rm->rdma.op_active) {
89 rds_iw_send_unmap_rdma(ic, &rm->rdma);
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111 rds_iw_send_rdma_complete(rm, wc_status);
112
113 if (rm->rdma.op_write)
114 rds_stats_add(s_send_rdma_bytes, rm->rdma.op_bytes);
115 else
116 rds_stats_add(s_recv_rdma_bytes, rm->rdma.op_bytes);
117 }
118
119
120
121 rds_message_unmapped(rm);
122
123 rds_message_put(rm);
124 send->s_rm = NULL;
125}
126
127void rds_iw_send_init_ring(struct rds_iw_connection *ic)
128{
129 struct rds_iw_send_work *send;
130 u32 i;
131
132 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
133 struct ib_sge *sge;
134
135 send->s_rm = NULL;
136 send->s_op = NULL;
137 send->s_mapping = NULL;
138
139 send->s_wr.next = NULL;
140 send->s_wr.wr_id = i;
141 send->s_wr.sg_list = send->s_sge;
142 send->s_wr.num_sge = 1;
143 send->s_wr.opcode = IB_WR_SEND;
144 send->s_wr.send_flags = 0;
145 send->s_wr.ex.imm_data = 0;
146
147 sge = rds_iw_data_sge(ic, send->s_sge);
148 sge->lkey = 0;
149
150 sge = rds_iw_header_sge(ic, send->s_sge);
151 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
152 sge->length = sizeof(struct rds_header);
153 sge->lkey = 0;
154
155 send->s_mr = ib_alloc_fast_reg_mr(ic->i_pd, fastreg_message_size);
156 if (IS_ERR(send->s_mr)) {
157 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed\n");
158 break;
159 }
160
161 send->s_page_list = ib_alloc_fast_reg_page_list(
162 ic->i_cm_id->device, fastreg_message_size);
163 if (IS_ERR(send->s_page_list)) {
164 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
165 break;
166 }
167 }
168}
169
170void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
171{
172 struct rds_iw_send_work *send;
173 u32 i;
174
175 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
176 BUG_ON(!send->s_mr);
177 ib_dereg_mr(send->s_mr);
178 BUG_ON(!send->s_page_list);
179 ib_free_fast_reg_page_list(send->s_page_list);
180 if (send->s_wr.opcode == 0xdead)
181 continue;
182 if (send->s_rm)
183 rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
184 if (send->s_op)
185 rds_iw_send_unmap_rdma(ic, send->s_op);
186 }
187}
188
189
190
191
192
193
194
195void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
196{
197 struct rds_connection *conn = context;
198 struct rds_iw_connection *ic = conn->c_transport_data;
199 struct ib_wc wc;
200 struct rds_iw_send_work *send;
201 u32 completed;
202 u32 oldest;
203 u32 i;
204 int ret;
205
206 rdsdebug("cq %p conn %p\n", cq, conn);
207 rds_iw_stats_inc(s_iw_tx_cq_call);
208 ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
209 if (ret)
210 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
211
212 while (ib_poll_cq(cq, 1, &wc) > 0) {
213 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
214 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
215 be32_to_cpu(wc.ex.imm_data));
216 rds_iw_stats_inc(s_iw_tx_cq_event);
217
218 if (wc.status != IB_WC_SUCCESS) {
219 printk(KERN_ERR "WC Error: status = %d opcode = %d\n", wc.status, wc.opcode);
220 break;
221 }
222
223 if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == RDS_IW_LOCAL_INV_WR_ID) {
224 ic->i_fastreg_posted = 0;
225 continue;
226 }
227
228 if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
229 ic->i_fastreg_posted = 1;
230 continue;
231 }
232
233 if (wc.wr_id == RDS_IW_ACK_WR_ID) {
234 if (ic->i_ack_queued + HZ/2 < jiffies)
235 rds_iw_stats_inc(s_iw_tx_stalled);
236 rds_iw_ack_send_complete(ic);
237 continue;
238 }
239
240 oldest = rds_iw_ring_oldest(&ic->i_send_ring);
241
242 completed = rds_iw_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
243
244 for (i = 0; i < completed; i++) {
245 send = &ic->i_sends[oldest];
246
247
248 switch (send->s_wr.opcode) {
249 case IB_WR_SEND:
250 if (send->s_rm)
251 rds_iw_send_unmap_rm(ic, send, wc.status);
252 break;
253 case IB_WR_FAST_REG_MR:
254 case IB_WR_RDMA_WRITE:
255 case IB_WR_RDMA_READ:
256 case IB_WR_RDMA_READ_WITH_INV:
257
258
259 break;
260 default:
261 if (printk_ratelimit())
262 printk(KERN_NOTICE
263 "RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
264 __func__, send->s_wr.opcode);
265 break;
266 }
267
268 send->s_wr.opcode = 0xdead;
269 send->s_wr.num_sge = 1;
270 if (send->s_queued + HZ/2 < jiffies)
271 rds_iw_stats_inc(s_iw_tx_stalled);
272
273
274
275
276
277 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) {
278 struct rds_message *rm;
279
280 rm = rds_send_get_message(conn, send->s_op);
281 if (rm)
282 rds_iw_send_rdma_complete(rm, wc.status);
283 }
284
285 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
286 }
287
288 rds_iw_ring_free(&ic->i_send_ring, completed);
289
290 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
291 test_bit(0, &conn->c_map_queued))
292 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
293
294
295 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
296 rds_iw_conn_error(conn,
297 "send completion on %pI4 "
298 "had status %u, disconnecting and reconnecting\n",
299 &conn->c_faddr, wc.status);
300 }
301 }
302}
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348int rds_iw_send_grab_credits(struct rds_iw_connection *ic,
349 u32 wanted, u32 *adv_credits, int need_posted, int max_posted)
350{
351 unsigned int avail, posted, got = 0, advertise;
352 long oldval, newval;
353
354 *adv_credits = 0;
355 if (!ic->i_flowctl)
356 return wanted;
357
358try_again:
359 advertise = 0;
360 oldval = newval = atomic_read(&ic->i_credits);
361 posted = IB_GET_POST_CREDITS(oldval);
362 avail = IB_GET_SEND_CREDITS(oldval);
363
364 rdsdebug("rds_iw_send_grab_credits(%u): credits=%u posted=%u\n",
365 wanted, avail, posted);
366
367
368 if (avail && !posted)
369 avail--;
370
371 if (avail < wanted) {
372 struct rds_connection *conn = ic->i_cm_id->context;
373
374
375 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
376 got = avail;
377 } else {
378
379 got = wanted;
380 }
381 newval -= IB_SET_SEND_CREDITS(got);
382
383
384
385
386
387
388 if (posted && (got || need_posted)) {
389 advertise = min_t(unsigned int, posted, max_posted);
390 newval -= IB_SET_POST_CREDITS(advertise);
391 }
392
393
394 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
395 goto try_again;
396
397 *adv_credits = advertise;
398 return got;
399}
400
401void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits)
402{
403 struct rds_iw_connection *ic = conn->c_transport_data;
404
405 if (credits == 0)
406 return;
407
408 rdsdebug("rds_iw_send_add_credits(%u): current=%u%s\n",
409 credits,
410 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
411 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
412
413 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
414 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
415 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
416
417 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
418
419 rds_iw_stats_inc(s_iw_rx_credit_updates);
420}
421
422void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted)
423{
424 struct rds_iw_connection *ic = conn->c_transport_data;
425
426 if (posted == 0)
427 return;
428
429 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
430
431
432
433
434
435
436
437
438
439
440
441
442
443 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
444 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
445}
446
447static inline void
448rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
449 struct rds_iw_send_work *send, unsigned int pos,
450 unsigned long buffer, unsigned int length,
451 int send_flags)
452{
453 struct ib_sge *sge;
454
455 WARN_ON(pos != send - ic->i_sends);
456
457 send->s_wr.send_flags = send_flags;
458 send->s_wr.opcode = IB_WR_SEND;
459 send->s_wr.num_sge = 2;
460 send->s_wr.next = NULL;
461 send->s_queued = jiffies;
462 send->s_op = NULL;
463
464 if (length != 0) {
465 sge = rds_iw_data_sge(ic, send->s_sge);
466 sge->addr = buffer;
467 sge->length = length;
468 sge->lkey = rds_iw_local_dma_lkey(ic);
469
470 sge = rds_iw_header_sge(ic, send->s_sge);
471 } else {
472
473
474 send->s_wr.num_sge = 1;
475 sge = &send->s_sge[0];
476 }
477
478 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
479 sge->length = sizeof(struct rds_header);
480 sge->lkey = rds_iw_local_dma_lkey(ic);
481}
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
497 unsigned int hdr_off, unsigned int sg, unsigned int off)
498{
499 struct rds_iw_connection *ic = conn->c_transport_data;
500 struct ib_device *dev = ic->i_cm_id->device;
501 struct rds_iw_send_work *send = NULL;
502 struct rds_iw_send_work *first;
503 struct rds_iw_send_work *prev;
504 struct ib_send_wr *failed_wr;
505 struct scatterlist *scat;
506 u32 pos;
507 u32 i;
508 u32 work_alloc;
509 u32 credit_alloc;
510 u32 posted;
511 u32 adv_credits = 0;
512 int send_flags = 0;
513 int sent;
514 int ret;
515 int flow_controlled = 0;
516
517 BUG_ON(off % RDS_FRAG_SIZE);
518 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
519
520
521 if (rds_rdma_cookie_key(rm->m_rdma_cookie) && !ic->i_fastreg_posted) {
522 ret = -EAGAIN;
523 goto out;
524 }
525
526
527 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
528 i = 1;
529 else
530 i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
531
532 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
533 if (work_alloc == 0) {
534 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
535 rds_iw_stats_inc(s_iw_tx_ring_full);
536 ret = -ENOMEM;
537 goto out;
538 }
539
540 credit_alloc = work_alloc;
541 if (ic->i_flowctl) {
542 credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
543 adv_credits += posted;
544 if (credit_alloc < work_alloc) {
545 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
546 work_alloc = credit_alloc;
547 flow_controlled++;
548 }
549 if (work_alloc == 0) {
550 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
551 rds_iw_stats_inc(s_iw_tx_throttle);
552 ret = -ENOMEM;
553 goto out;
554 }
555 }
556
557
558 if (!ic->i_rm) {
559
560
561
562
563
564
565 if (rm->data.op_nents) {
566 rm->data.op_count = ib_dma_map_sg(dev,
567 rm->data.op_sg,
568 rm->data.op_nents,
569 DMA_TO_DEVICE);
570 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
571 if (rm->data.op_count == 0) {
572 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
573 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
574 ret = -ENOMEM;
575 goto out;
576 }
577 } else {
578 rm->data.op_count = 0;
579 }
580
581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
582 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
583 rds_message_addref(rm);
584 ic->i_rm = rm;
585
586
587 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
588 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
589 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
590 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
591
592
593
594 if (rm->rdma.op_active) {
595 struct rds_ext_header_rdma ext_hdr;
596
597 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
598 rds_message_add_extension(&rm->m_inc.i_hdr,
599 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
600 }
601 if (rm->m_rdma_cookie) {
602 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
603 rds_rdma_cookie_key(rm->m_rdma_cookie),
604 rds_rdma_cookie_offset(rm->m_rdma_cookie));
605 }
606
607
608
609
610
611 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_iw_piggyb_ack(ic));
612 rds_message_make_checksum(&rm->m_inc.i_hdr);
613
614
615
616
617 rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
618 adv_credits += posted;
619 BUG_ON(adv_credits > 255);
620 }
621
622 send = &ic->i_sends[pos];
623 first = send;
624 prev = NULL;
625 scat = &rm->data.op_sg[sg];
626 sent = 0;
627 i = 0;
628
629
630
631
632
633
634
635 if (rm->rdma.op_active && rm->rdma.op_fence)
636 send_flags = IB_SEND_FENCE;
637
638
639
640
641
642
643
644
645
646
647
648 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) {
649 rds_iw_xmit_populate_wr(ic, send, pos, 0, 0, send_flags);
650 goto add_header;
651 }
652
653
654 for (; i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]; i++) {
655 unsigned int len;
656
657 send = &ic->i_sends[pos];
658
659 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
660 rds_iw_xmit_populate_wr(ic, send, pos,
661 ib_sg_dma_address(dev, scat) + off, len,
662 send_flags);
663
664
665
666
667
668
669 if (ic->i_unsignaled_wrs-- == 0) {
670 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
671 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
672 }
673
674 ic->i_unsignaled_bytes -= len;
675 if (ic->i_unsignaled_bytes <= 0) {
676 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
677 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
678 }
679
680
681
682
683 if (flow_controlled && i == (work_alloc-1))
684 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
685
686 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
687 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
688
689 sent += len;
690 off += len;
691 if (off == ib_sg_dma_len(dev, scat)) {
692 scat++;
693 off = 0;
694 }
695
696add_header:
697
698
699 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
700
701 if (0) {
702 struct rds_header *hdr = &ic->i_send_hdrs[pos];
703
704 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
705 be16_to_cpu(hdr->h_dport),
706 hdr->h_flags,
707 be32_to_cpu(hdr->h_len));
708 }
709 if (adv_credits) {
710 struct rds_header *hdr = &ic->i_send_hdrs[pos];
711
712
713 hdr->h_credit = adv_credits;
714 rds_message_make_checksum(hdr);
715 adv_credits = 0;
716 rds_iw_stats_inc(s_iw_tx_credit_updates);
717 }
718
719 if (prev)
720 prev->s_wr.next = &send->s_wr;
721 prev = send;
722
723 pos = (pos + 1) % ic->i_send_ring.w_nr;
724 }
725
726
727
728 if (hdr_off == 0)
729 sent += sizeof(struct rds_header);
730
731
732 if (scat == &rm->data.op_sg[rm->data.op_count]) {
733 prev->s_rm = ic->i_rm;
734 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
735 ic->i_rm = NULL;
736 }
737
738 if (i < work_alloc) {
739 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
740 work_alloc = i;
741 }
742 if (ic->i_flowctl && i < credit_alloc)
743 rds_iw_send_add_credits(conn, credit_alloc - i);
744
745
746 failed_wr = &first->s_wr;
747 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
748 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
749 first, &first->s_wr, ret, failed_wr);
750 BUG_ON(failed_wr != &first->s_wr);
751 if (ret) {
752 printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
753 "returned %d\n", &conn->c_faddr, ret);
754 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
755 if (prev->s_rm) {
756 ic->i_rm = prev->s_rm;
757 prev->s_rm = NULL;
758 }
759 goto out;
760 }
761
762 ret = sent;
763out:
764 BUG_ON(adv_credits);
765 return ret;
766}
767
768static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr)
769{
770 BUG_ON(nent > send->s_page_list->max_page_list_len);
771
772
773
774
775
776 send->s_wr.opcode = IB_WR_FAST_REG_MR;
777 send->s_wr.wr.fast_reg.length = len;
778 send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey;
779 send->s_wr.wr.fast_reg.page_list = send->s_page_list;
780 send->s_wr.wr.fast_reg.page_list_len = nent;
781 send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
782 send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
783 send->s_wr.wr.fast_reg.iova_start = sg_addr;
784
785 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
786}
787
788int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
789{
790 struct rds_iw_connection *ic = conn->c_transport_data;
791 struct rds_iw_send_work *send = NULL;
792 struct rds_iw_send_work *first;
793 struct rds_iw_send_work *prev;
794 struct ib_send_wr *failed_wr;
795 struct rds_iw_device *rds_iwdev;
796 struct scatterlist *scat;
797 unsigned long len;
798 u64 remote_addr = op->op_remote_addr;
799 u32 pos, fr_pos;
800 u32 work_alloc;
801 u32 i;
802 u32 j;
803 int sent;
804 int ret;
805 int num_sge;
806
807 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
808
809
810 if (!op->op_mapped) {
811 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
812 op->op_sg, op->op_nents, (op->op_write) ?
813 DMA_TO_DEVICE : DMA_FROM_DEVICE);
814 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
815 if (op->op_count == 0) {
816 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
817 ret = -ENOMEM;
818 goto out;
819 }
820
821 op->op_mapped = 1;
822 }
823
824 if (!op->op_write) {
825
826 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
827 if (work_alloc != 1) {
828 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
829 rds_iw_stats_inc(s_iw_tx_ring_full);
830 ret = -ENOMEM;
831 goto out;
832 }
833 }
834
835
836
837
838
839 i = ceil(op->op_count, rds_iwdev->max_sge);
840
841 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
842 if (work_alloc != i) {
843 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
844 rds_iw_stats_inc(s_iw_tx_ring_full);
845 ret = -ENOMEM;
846 goto out;
847 }
848
849 send = &ic->i_sends[pos];
850 if (!op->op_write) {
851 first = prev = &ic->i_sends[fr_pos];
852 } else {
853 first = send;
854 prev = NULL;
855 }
856 scat = &op->op_sg[0];
857 sent = 0;
858 num_sge = op->op_count;
859
860 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
861 send->s_wr.send_flags = 0;
862 send->s_queued = jiffies;
863
864
865
866
867
868 if (ic->i_unsignaled_wrs-- == 0) {
869 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
870 send->s_wr.send_flags = IB_SEND_SIGNALED;
871 }
872
873
874
875
876
877 if (op->op_write)
878 send->s_wr.opcode = IB_WR_RDMA_WRITE;
879 else
880 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
881
882 send->s_wr.wr.rdma.remote_addr = remote_addr;
883 send->s_wr.wr.rdma.rkey = op->op_rkey;
884 send->s_op = op;
885
886 if (num_sge > rds_iwdev->max_sge) {
887 send->s_wr.num_sge = rds_iwdev->max_sge;
888 num_sge -= rds_iwdev->max_sge;
889 } else
890 send->s_wr.num_sge = num_sge;
891
892 send->s_wr.next = NULL;
893
894 if (prev)
895 prev->s_wr.next = &send->s_wr;
896
897 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
898 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
899
900 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
901 send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
902 else {
903 send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
904 send->s_sge[j].length = len;
905 send->s_sge[j].lkey = rds_iw_local_dma_lkey(ic);
906 }
907
908 sent += len;
909 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
910 remote_addr += len;
911
912 scat++;
913 }
914
915 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
916 send->s_wr.num_sge = 1;
917 send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
918 send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
919 send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
920 }
921
922 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
923 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
924
925 prev = send;
926 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
927 send = ic->i_sends;
928 }
929
930
931 if (scat == &op->op_sg[op->op_count])
932 first->s_wr.send_flags = IB_SEND_SIGNALED;
933
934 if (i < work_alloc) {
935 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
936 work_alloc = i;
937 }
938
939
940
941
942
943
944
945 if (!op->op_write) {
946 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
947 op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
948 work_alloc++;
949 }
950
951 failed_wr = &first->s_wr;
952 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
953 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
954 first, &first->s_wr, ret, failed_wr);
955 BUG_ON(failed_wr != &first->s_wr);
956 if (ret) {
957 printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
958 "returned %d\n", &conn->c_faddr, ret);
959 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
960 goto out;
961 }
962
963out:
964 return ret;
965}
966
967void rds_iw_xmit_complete(struct rds_connection *conn)
968{
969 struct rds_iw_connection *ic = conn->c_transport_data;
970
971
972
973 rds_iw_attempt_ack(ic);
974}
975