1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/device.h>
36#include <linux/dmapool.h>
37#include <linux/ratelimit.h>
38
39#include "rds_single_path.h"
40#include "rds.h"
41#include "ib.h"
42
43
44
45
46
47static void rds_ib_send_complete(struct rds_message *rm,
48 int wc_status,
49 void (*complete)(struct rds_message *rm, int status))
50{
51 int notify_status;
52
53 switch (wc_status) {
54 case IB_WC_WR_FLUSH_ERR:
55 return;
56
57 case IB_WC_SUCCESS:
58 notify_status = RDS_RDMA_SUCCESS;
59 break;
60
61 case IB_WC_REM_ACCESS_ERR:
62 notify_status = RDS_RDMA_REMOTE_ERROR;
63 break;
64
65 default:
66 notify_status = RDS_RDMA_OTHER_ERROR;
67 break;
68 }
69 complete(rm, notify_status);
70}
71
72static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
73 struct rm_data_op *op,
74 int wc_status)
75{
76 if (op->op_nents)
77 ib_dma_unmap_sg(ic->i_cm_id->device,
78 op->op_sg, op->op_nents,
79 DMA_TO_DEVICE);
80}
81
82static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
83 struct rm_rdma_op *op,
84 int wc_status)
85{
86 if (op->op_mapped) {
87 ib_dma_unmap_sg(ic->i_cm_id->device,
88 op->op_sg, op->op_nents,
89 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
90 op->op_mapped = 0;
91 }
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113 rds_ib_send_complete(container_of(op, struct rds_message, rdma),
114 wc_status, rds_rdma_send_complete);
115
116 if (op->op_write)
117 rds_stats_add(s_send_rdma_bytes, op->op_bytes);
118 else
119 rds_stats_add(s_recv_rdma_bytes, op->op_bytes);
120}
121
122static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
123 struct rm_atomic_op *op,
124 int wc_status)
125{
126
127 if (op->op_mapped) {
128 ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
129 DMA_FROM_DEVICE);
130 op->op_mapped = 0;
131 }
132
133 rds_ib_send_complete(container_of(op, struct rds_message, atomic),
134 wc_status, rds_atomic_send_complete);
135
136 if (op->op_type == RDS_ATOMIC_TYPE_CSWP)
137 rds_ib_stats_inc(s_ib_atomic_cswp);
138 else
139 rds_ib_stats_inc(s_ib_atomic_fadd);
140}
141
142
143
144
145
146
147
148
149static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
150 struct rds_ib_send_work *send,
151 int wc_status)
152{
153 struct rds_message *rm = NULL;
154
155
156 switch (send->s_wr.opcode) {
157 case IB_WR_SEND:
158 if (send->s_op) {
159 rm = container_of(send->s_op, struct rds_message, data);
160 rds_ib_send_unmap_data(ic, send->s_op, wc_status);
161 }
162 break;
163 case IB_WR_RDMA_WRITE:
164 case IB_WR_RDMA_READ:
165 if (send->s_op) {
166 rm = container_of(send->s_op, struct rds_message, rdma);
167 rds_ib_send_unmap_rdma(ic, send->s_op, wc_status);
168 }
169 break;
170 case IB_WR_ATOMIC_FETCH_AND_ADD:
171 case IB_WR_ATOMIC_CMP_AND_SWP:
172 if (send->s_op) {
173 rm = container_of(send->s_op, struct rds_message, atomic);
174 rds_ib_send_unmap_atomic(ic, send->s_op, wc_status);
175 }
176 break;
177 default:
178 printk_ratelimited(KERN_NOTICE
179 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
180 __func__, send->s_wr.opcode);
181 break;
182 }
183
184 send->s_wr.opcode = 0xdead;
185
186 return rm;
187}
188
189void rds_ib_send_init_ring(struct rds_ib_connection *ic)
190{
191 struct rds_ib_send_work *send;
192 u32 i;
193
194 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
195 struct ib_sge *sge;
196
197 send->s_op = NULL;
198
199 send->s_wr.wr_id = i;
200 send->s_wr.sg_list = send->s_sge;
201 send->s_wr.ex.imm_data = 0;
202
203 sge = &send->s_sge[0];
204 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
205 sge->length = sizeof(struct rds_header);
206 sge->lkey = ic->i_pd->local_dma_lkey;
207
208 send->s_sge[1].lkey = ic->i_pd->local_dma_lkey;
209 }
210}
211
212void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
213{
214 struct rds_ib_send_work *send;
215 u32 i;
216
217 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
218 if (send->s_op && send->s_wr.opcode != 0xdead)
219 rds_ib_send_unmap_op(ic, send, IB_WC_WR_FLUSH_ERR);
220 }
221}
222
223
224
225
226
227static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
228{
229 if ((atomic_sub_return(nr, &ic->i_signaled_sends) == 0) &&
230 waitqueue_active(&rds_ib_ring_empty_wait))
231 wake_up(&rds_ib_ring_empty_wait);
232 BUG_ON(atomic_read(&ic->i_signaled_sends) < 0);
233}
234
235
236
237
238
239
240
241void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
242{
243 struct rds_message *rm = NULL;
244 struct rds_connection *conn = ic->conn;
245 struct rds_ib_send_work *send;
246 u32 completed;
247 u32 oldest;
248 u32 i = 0;
249 int nr_sig = 0;
250
251 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
252 (unsigned long long)wc->wr_id, wc->status,
253 ib_wc_status_msg(wc->status), wc->byte_len,
254 be32_to_cpu(wc->ex.imm_data));
255 rds_ib_stats_inc(s_ib_tx_cq_event);
256
257 if (wc->wr_id == RDS_IB_ACK_WR_ID) {
258 if (time_after(jiffies, ic->i_ack_queued + HZ / 2))
259 rds_ib_stats_inc(s_ib_tx_stalled);
260 rds_ib_ack_send_complete(ic);
261 return;
262 }
263
264 oldest = rds_ib_ring_oldest(&ic->i_send_ring);
265
266 completed = rds_ib_ring_completed(&ic->i_send_ring, wc->wr_id, oldest);
267
268 for (i = 0; i < completed; i++) {
269 send = &ic->i_sends[oldest];
270 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
271 nr_sig++;
272
273 rm = rds_ib_send_unmap_op(ic, send, wc->status);
274
275 if (time_after(jiffies, send->s_queued + HZ / 2))
276 rds_ib_stats_inc(s_ib_tx_stalled);
277
278 if (send->s_op) {
279 if (send->s_op == rm->m_final_op) {
280
281
282
283 rds_message_unmapped(rm);
284 }
285 rds_message_put(rm);
286 send->s_op = NULL;
287 }
288
289 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
290 }
291
292 rds_ib_ring_free(&ic->i_send_ring, completed);
293 rds_ib_sub_signaled(ic, nr_sig);
294 nr_sig = 0;
295
296 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
297 test_bit(0, &conn->c_map_queued))
298 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
299
300
301 if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
302 rds_ib_conn_error(conn, "send completion on %pI4 had status %u (%s), disconnecting and reconnecting\n",
303 &conn->c_faddr, wc->status,
304 ib_wc_status_msg(wc->status));
305 }
306}
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352int rds_ib_send_grab_credits(struct rds_ib_connection *ic,
353 u32 wanted, u32 *adv_credits, int need_posted, int max_posted)
354{
355 unsigned int avail, posted, got = 0, advertise;
356 long oldval, newval;
357
358 *adv_credits = 0;
359 if (!ic->i_flowctl)
360 return wanted;
361
362try_again:
363 advertise = 0;
364 oldval = newval = atomic_read(&ic->i_credits);
365 posted = IB_GET_POST_CREDITS(oldval);
366 avail = IB_GET_SEND_CREDITS(oldval);
367
368 rdsdebug("rds_ib_send_grab_credits(%u): credits=%u posted=%u\n",
369 wanted, avail, posted);
370
371
372 if (avail && !posted)
373 avail--;
374
375 if (avail < wanted) {
376 struct rds_connection *conn = ic->i_cm_id->context;
377
378
379 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
380 got = avail;
381 } else {
382
383 got = wanted;
384 }
385 newval -= IB_SET_SEND_CREDITS(got);
386
387
388
389
390
391
392 if (posted && (got || need_posted)) {
393 advertise = min_t(unsigned int, posted, max_posted);
394 newval -= IB_SET_POST_CREDITS(advertise);
395 }
396
397
398 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
399 goto try_again;
400
401 *adv_credits = advertise;
402 return got;
403}
404
405void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits)
406{
407 struct rds_ib_connection *ic = conn->c_transport_data;
408
409 if (credits == 0)
410 return;
411
412 rdsdebug("rds_ib_send_add_credits(%u): current=%u%s\n",
413 credits,
414 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
415 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
416
417 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
418 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
419 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
420
421 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
422
423 rds_ib_stats_inc(s_ib_rx_credit_updates);
424}
425
426void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
427{
428 struct rds_ib_connection *ic = conn->c_transport_data;
429
430 if (posted == 0)
431 return;
432
433 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
434
435
436
437
438
439
440
441
442
443
444
445
446
447 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
448 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
449}
450
451static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,
452 struct rds_ib_send_work *send,
453 bool notify)
454{
455
456
457
458
459
460 if (ic->i_unsignaled_wrs-- == 0 || notify) {
461 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
462 send->s_wr.send_flags |= IB_SEND_SIGNALED;
463 return 1;
464 }
465 return 0;
466}
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
482 unsigned int hdr_off, unsigned int sg, unsigned int off)
483{
484 struct rds_ib_connection *ic = conn->c_transport_data;
485 struct ib_device *dev = ic->i_cm_id->device;
486 struct rds_ib_send_work *send = NULL;
487 struct rds_ib_send_work *first;
488 struct rds_ib_send_work *prev;
489 struct ib_send_wr *failed_wr;
490 struct scatterlist *scat;
491 u32 pos;
492 u32 i;
493 u32 work_alloc;
494 u32 credit_alloc = 0;
495 u32 posted;
496 u32 adv_credits = 0;
497 int send_flags = 0;
498 int bytes_sent = 0;
499 int ret;
500 int flow_controlled = 0;
501 int nr_sig = 0;
502
503 BUG_ON(off % RDS_FRAG_SIZE);
504 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
505
506
507 if (conn->c_loopback
508 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
509 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
510 scat = &rm->data.op_sg[sg];
511 ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
512 ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
513 return ret;
514 }
515
516
517 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
518 i = 1;
519 else
520 i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
521
522 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
523 if (work_alloc == 0) {
524 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
525 rds_ib_stats_inc(s_ib_tx_ring_full);
526 ret = -ENOMEM;
527 goto out;
528 }
529
530 if (ic->i_flowctl) {
531 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
532 adv_credits += posted;
533 if (credit_alloc < work_alloc) {
534 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
535 work_alloc = credit_alloc;
536 flow_controlled = 1;
537 }
538 if (work_alloc == 0) {
539 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
540 rds_ib_stats_inc(s_ib_tx_throttle);
541 ret = -ENOMEM;
542 goto out;
543 }
544 }
545
546
547 if (!ic->i_data_op) {
548 if (rm->data.op_nents) {
549 rm->data.op_count = ib_dma_map_sg(dev,
550 rm->data.op_sg,
551 rm->data.op_nents,
552 DMA_TO_DEVICE);
553 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
554 if (rm->data.op_count == 0) {
555 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
556 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
557 ret = -ENOMEM;
558 goto out;
559 }
560 } else {
561 rm->data.op_count = 0;
562 }
563
564 rds_message_addref(rm);
565 rm->data.op_dmasg = 0;
566 rm->data.op_dmaoff = 0;
567 ic->i_data_op = &rm->data;
568
569
570 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
571 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
572 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
573 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
574
575
576
577 if (rm->rdma.op_active) {
578 struct rds_ext_header_rdma ext_hdr;
579
580 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
581 rds_message_add_extension(&rm->m_inc.i_hdr,
582 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
583 }
584 if (rm->m_rdma_cookie) {
585 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
586 rds_rdma_cookie_key(rm->m_rdma_cookie),
587 rds_rdma_cookie_offset(rm->m_rdma_cookie));
588 }
589
590
591
592
593
594 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_ib_piggyb_ack(ic));
595 rds_message_make_checksum(&rm->m_inc.i_hdr);
596
597
598
599
600 if (ic->i_flowctl) {
601 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
602 adv_credits += posted;
603 BUG_ON(adv_credits > 255);
604 }
605 }
606
607
608
609
610
611
612
613 if (rm->rdma.op_active && rm->rdma.op_fence)
614 send_flags = IB_SEND_FENCE;
615
616
617 send = &ic->i_sends[pos];
618 first = send;
619 prev = NULL;
620 scat = &ic->i_data_op->op_sg[rm->data.op_dmasg];
621 i = 0;
622 do {
623 unsigned int len = 0;
624
625
626 send->s_wr.send_flags = send_flags;
627 send->s_wr.opcode = IB_WR_SEND;
628 send->s_wr.num_sge = 1;
629 send->s_wr.next = NULL;
630 send->s_queued = jiffies;
631 send->s_op = NULL;
632
633 send->s_sge[0].addr = ic->i_send_hdrs_dma
634 + (pos * sizeof(struct rds_header));
635 send->s_sge[0].length = sizeof(struct rds_header);
636
637 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
638
639
640 if (i < work_alloc
641 && scat != &rm->data.op_sg[rm->data.op_count]) {
642 len = min(RDS_FRAG_SIZE,
643 ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
644 send->s_wr.num_sge = 2;
645
646 send->s_sge[1].addr = ib_sg_dma_address(dev, scat);
647 send->s_sge[1].addr += rm->data.op_dmaoff;
648 send->s_sge[1].length = len;
649
650 bytes_sent += len;
651 rm->data.op_dmaoff += len;
652 if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
653 scat++;
654 rm->data.op_dmasg++;
655 rm->data.op_dmaoff = 0;
656 }
657 }
658
659 rds_ib_set_wr_signal_state(ic, send, 0);
660
661
662
663
664 if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
665 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
666
667 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
668 nr_sig++;
669
670 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
671 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
672
673 if (ic->i_flowctl && adv_credits) {
674 struct rds_header *hdr = &ic->i_send_hdrs[pos];
675
676
677 hdr->h_credit = adv_credits;
678 rds_message_make_checksum(hdr);
679 adv_credits = 0;
680 rds_ib_stats_inc(s_ib_tx_credit_updates);
681 }
682
683 if (prev)
684 prev->s_wr.next = &send->s_wr;
685 prev = send;
686
687 pos = (pos + 1) % ic->i_send_ring.w_nr;
688 send = &ic->i_sends[pos];
689 i++;
690
691 } while (i < work_alloc
692 && scat != &rm->data.op_sg[rm->data.op_count]);
693
694
695
696 if (hdr_off == 0)
697 bytes_sent += sizeof(struct rds_header);
698
699
700 if (scat == &rm->data.op_sg[rm->data.op_count]) {
701 prev->s_op = ic->i_data_op;
702 prev->s_wr.send_flags |= IB_SEND_SOLICITED;
703 if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) {
704 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
705 prev->s_wr.send_flags |= IB_SEND_SIGNALED;
706 nr_sig++;
707 }
708 ic->i_data_op = NULL;
709 }
710
711
712 if (i < work_alloc) {
713 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
714 work_alloc = i;
715 }
716 if (ic->i_flowctl && i < credit_alloc)
717 rds_ib_send_add_credits(conn, credit_alloc - i);
718
719 if (nr_sig)
720 atomic_add(nr_sig, &ic->i_signaled_sends);
721
722
723 failed_wr = &first->s_wr;
724 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
725 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
726 first, &first->s_wr, ret, failed_wr);
727 BUG_ON(failed_wr != &first->s_wr);
728 if (ret) {
729 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
730 "returned %d\n", &conn->c_faddr, ret);
731 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
732 rds_ib_sub_signaled(ic, nr_sig);
733 if (prev->s_op) {
734 ic->i_data_op = prev->s_op;
735 prev->s_op = NULL;
736 }
737
738 rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
739 goto out;
740 }
741
742 ret = bytes_sent;
743out:
744 BUG_ON(adv_credits);
745 return ret;
746}
747
748
749
750
751
752
753int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
754{
755 struct rds_ib_connection *ic = conn->c_transport_data;
756 struct rds_ib_send_work *send = NULL;
757 struct ib_send_wr *failed_wr;
758 struct rds_ib_device *rds_ibdev;
759 u32 pos;
760 u32 work_alloc;
761 int ret;
762 int nr_sig = 0;
763
764 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
765
766 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
767 if (work_alloc != 1) {
768 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
769 rds_ib_stats_inc(s_ib_tx_ring_full);
770 ret = -ENOMEM;
771 goto out;
772 }
773
774
775 send = &ic->i_sends[pos];
776 send->s_queued = jiffies;
777
778 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
779 send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
780 send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
781 send->s_atomic_wr.swap = op->op_m_cswp.swap;
782 send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
783 send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
784 } else {
785 send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
786 send->s_atomic_wr.compare_add = op->op_m_fadd.add;
787 send->s_atomic_wr.swap = 0;
788 send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
789 send->s_atomic_wr.swap_mask = 0;
790 }
791 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
792 send->s_atomic_wr.wr.num_sge = 1;
793 send->s_atomic_wr.wr.next = NULL;
794 send->s_atomic_wr.remote_addr = op->op_remote_addr;
795 send->s_atomic_wr.rkey = op->op_rkey;
796 send->s_op = op;
797 rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
798
799
800 ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
801 rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
802 if (ret != 1) {
803 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
804 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
805 ret = -ENOMEM;
806 goto out;
807 }
808
809
810 send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
811 send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
812 send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
813
814 rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
815 send->s_sge[0].addr, send->s_sge[0].length);
816
817 if (nr_sig)
818 atomic_add(nr_sig, &ic->i_signaled_sends);
819
820 failed_wr = &send->s_atomic_wr.wr;
821 ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
822 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
823 send, &send->s_atomic_wr, ret, failed_wr);
824 BUG_ON(failed_wr != &send->s_atomic_wr.wr);
825 if (ret) {
826 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
827 "returned %d\n", &conn->c_faddr, ret);
828 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
829 rds_ib_sub_signaled(ic, nr_sig);
830 goto out;
831 }
832
833 if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
834 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
835 BUG_ON(failed_wr != &send->s_atomic_wr.wr);
836 }
837
838out:
839 return ret;
840}
841
842int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
843{
844 struct rds_ib_connection *ic = conn->c_transport_data;
845 struct rds_ib_send_work *send = NULL;
846 struct rds_ib_send_work *first;
847 struct rds_ib_send_work *prev;
848 struct ib_send_wr *failed_wr;
849 struct scatterlist *scat;
850 unsigned long len;
851 u64 remote_addr = op->op_remote_addr;
852 u32 max_sge = ic->rds_ibdev->max_sge;
853 u32 pos;
854 u32 work_alloc;
855 u32 i;
856 u32 j;
857 int sent;
858 int ret;
859 int num_sge;
860 int nr_sig = 0;
861
862
863 if (!op->op_mapped) {
864 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
865 op->op_sg, op->op_nents, (op->op_write) ?
866 DMA_TO_DEVICE : DMA_FROM_DEVICE);
867 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
868 if (op->op_count == 0) {
869 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
870 ret = -ENOMEM;
871 goto out;
872 }
873
874 op->op_mapped = 1;
875 }
876
877
878
879
880
881 i = ceil(op->op_count, max_sge);
882
883 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
884 if (work_alloc != i) {
885 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
886 rds_ib_stats_inc(s_ib_tx_ring_full);
887 ret = -ENOMEM;
888 goto out;
889 }
890
891 send = &ic->i_sends[pos];
892 first = send;
893 prev = NULL;
894 scat = &op->op_sg[0];
895 sent = 0;
896 num_sge = op->op_count;
897
898 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
899 send->s_wr.send_flags = 0;
900 send->s_queued = jiffies;
901 send->s_op = NULL;
902
903 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
904
905 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
906 send->s_rdma_wr.remote_addr = remote_addr;
907 send->s_rdma_wr.rkey = op->op_rkey;
908
909 if (num_sge > max_sge) {
910 send->s_rdma_wr.wr.num_sge = max_sge;
911 num_sge -= max_sge;
912 } else {
913 send->s_rdma_wr.wr.num_sge = num_sge;
914 }
915
916 send->s_rdma_wr.wr.next = NULL;
917
918 if (prev)
919 prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
920
921 for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
922 scat != &op->op_sg[op->op_count]; j++) {
923 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
924 send->s_sge[j].addr =
925 ib_sg_dma_address(ic->i_cm_id->device, scat);
926 send->s_sge[j].length = len;
927 send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;
928
929 sent += len;
930 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
931
932 remote_addr += len;
933 scat++;
934 }
935
936 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
937 &send->s_rdma_wr.wr,
938 send->s_rdma_wr.wr.num_sge,
939 send->s_rdma_wr.wr.next);
940
941 prev = send;
942 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
943 send = ic->i_sends;
944 }
945
946
947 if (scat == &op->op_sg[op->op_count]) {
948 prev->s_op = op;
949 rds_message_addref(container_of(op, struct rds_message, rdma));
950 }
951
952 if (i < work_alloc) {
953 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
954 work_alloc = i;
955 }
956
957 if (nr_sig)
958 atomic_add(nr_sig, &ic->i_signaled_sends);
959
960 failed_wr = &first->s_rdma_wr.wr;
961 ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
962 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
963 first, &first->s_rdma_wr.wr, ret, failed_wr);
964 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
965 if (ret) {
966 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
967 "returned %d\n", &conn->c_faddr, ret);
968 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
969 rds_ib_sub_signaled(ic, nr_sig);
970 goto out;
971 }
972
973 if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
974 printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
975 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
976 }
977
978
979out:
980 return ret;
981}
982
983void rds_ib_xmit_path_complete(struct rds_conn_path *cp)
984{
985 struct rds_connection *conn = cp->cp_conn;
986 struct rds_ib_connection *ic = conn->c_transport_data;
987
988
989
990 rds_ib_attempt_ack(ic);
991}
992