1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/device.h>
36#include <linux/dmapool.h>
37#include <linux/ratelimit.h>
38
39#include "rds.h"
40#include "ib.h"
41
42static char *rds_ib_wc_status_strings[] = {
43#define RDS_IB_WC_STATUS_STR(foo) \
44 [IB_WC_##foo] = __stringify(IB_WC_##foo)
45 RDS_IB_WC_STATUS_STR(SUCCESS),
46 RDS_IB_WC_STATUS_STR(LOC_LEN_ERR),
47 RDS_IB_WC_STATUS_STR(LOC_QP_OP_ERR),
48 RDS_IB_WC_STATUS_STR(LOC_EEC_OP_ERR),
49 RDS_IB_WC_STATUS_STR(LOC_PROT_ERR),
50 RDS_IB_WC_STATUS_STR(WR_FLUSH_ERR),
51 RDS_IB_WC_STATUS_STR(MW_BIND_ERR),
52 RDS_IB_WC_STATUS_STR(BAD_RESP_ERR),
53 RDS_IB_WC_STATUS_STR(LOC_ACCESS_ERR),
54 RDS_IB_WC_STATUS_STR(REM_INV_REQ_ERR),
55 RDS_IB_WC_STATUS_STR(REM_ACCESS_ERR),
56 RDS_IB_WC_STATUS_STR(REM_OP_ERR),
57 RDS_IB_WC_STATUS_STR(RETRY_EXC_ERR),
58 RDS_IB_WC_STATUS_STR(RNR_RETRY_EXC_ERR),
59 RDS_IB_WC_STATUS_STR(LOC_RDD_VIOL_ERR),
60 RDS_IB_WC_STATUS_STR(REM_INV_RD_REQ_ERR),
61 RDS_IB_WC_STATUS_STR(REM_ABORT_ERR),
62 RDS_IB_WC_STATUS_STR(INV_EECN_ERR),
63 RDS_IB_WC_STATUS_STR(INV_EEC_STATE_ERR),
64 RDS_IB_WC_STATUS_STR(FATAL_ERR),
65 RDS_IB_WC_STATUS_STR(RESP_TIMEOUT_ERR),
66 RDS_IB_WC_STATUS_STR(GENERAL_ERR),
67#undef RDS_IB_WC_STATUS_STR
68};
69
70char *rds_ib_wc_status_str(enum ib_wc_status status)
71{
72 return rds_str_array(rds_ib_wc_status_strings,
73 ARRAY_SIZE(rds_ib_wc_status_strings), status);
74}
75
76
77
78
79
80static void rds_ib_send_complete(struct rds_message *rm,
81 int wc_status,
82 void (*complete)(struct rds_message *rm, int status))
83{
84 int notify_status;
85
86 switch (wc_status) {
87 case IB_WC_WR_FLUSH_ERR:
88 return;
89
90 case IB_WC_SUCCESS:
91 notify_status = RDS_RDMA_SUCCESS;
92 break;
93
94 case IB_WC_REM_ACCESS_ERR:
95 notify_status = RDS_RDMA_REMOTE_ERROR;
96 break;
97
98 default:
99 notify_status = RDS_RDMA_OTHER_ERROR;
100 break;
101 }
102 complete(rm, notify_status);
103}
104
105static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
106 struct rm_data_op *op,
107 int wc_status)
108{
109 if (op->op_nents)
110 ib_dma_unmap_sg(ic->i_cm_id->device,
111 op->op_sg, op->op_nents,
112 DMA_TO_DEVICE);
113}
114
115static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
116 struct rm_rdma_op *op,
117 int wc_status)
118{
119 if (op->op_mapped) {
120 ib_dma_unmap_sg(ic->i_cm_id->device,
121 op->op_sg, op->op_nents,
122 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
123 op->op_mapped = 0;
124 }
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146 rds_ib_send_complete(container_of(op, struct rds_message, rdma),
147 wc_status, rds_rdma_send_complete);
148
149 if (op->op_write)
150 rds_stats_add(s_send_rdma_bytes, op->op_bytes);
151 else
152 rds_stats_add(s_recv_rdma_bytes, op->op_bytes);
153}
154
155static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
156 struct rm_atomic_op *op,
157 int wc_status)
158{
159
160 if (op->op_mapped) {
161 ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
162 DMA_FROM_DEVICE);
163 op->op_mapped = 0;
164 }
165
166 rds_ib_send_complete(container_of(op, struct rds_message, atomic),
167 wc_status, rds_atomic_send_complete);
168
169 if (op->op_type == RDS_ATOMIC_TYPE_CSWP)
170 rds_ib_stats_inc(s_ib_atomic_cswp);
171 else
172 rds_ib_stats_inc(s_ib_atomic_fadd);
173}
174
175
176
177
178
179
180
181
182static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
183 struct rds_ib_send_work *send,
184 int wc_status)
185{
186 struct rds_message *rm = NULL;
187
188
189 switch (send->s_wr.opcode) {
190 case IB_WR_SEND:
191 if (send->s_op) {
192 rm = container_of(send->s_op, struct rds_message, data);
193 rds_ib_send_unmap_data(ic, send->s_op, wc_status);
194 }
195 break;
196 case IB_WR_RDMA_WRITE:
197 case IB_WR_RDMA_READ:
198 if (send->s_op) {
199 rm = container_of(send->s_op, struct rds_message, rdma);
200 rds_ib_send_unmap_rdma(ic, send->s_op, wc_status);
201 }
202 break;
203 case IB_WR_ATOMIC_FETCH_AND_ADD:
204 case IB_WR_ATOMIC_CMP_AND_SWP:
205 if (send->s_op) {
206 rm = container_of(send->s_op, struct rds_message, atomic);
207 rds_ib_send_unmap_atomic(ic, send->s_op, wc_status);
208 }
209 break;
210 default:
211 printk_ratelimited(KERN_NOTICE
212 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
213 __func__, send->s_wr.opcode);
214 break;
215 }
216
217 send->s_wr.opcode = 0xdead;
218
219 return rm;
220}
221
222void rds_ib_send_init_ring(struct rds_ib_connection *ic)
223{
224 struct rds_ib_send_work *send;
225 u32 i;
226
227 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
228 struct ib_sge *sge;
229
230 send->s_op = NULL;
231
232 send->s_wr.wr_id = i;
233 send->s_wr.sg_list = send->s_sge;
234 send->s_wr.ex.imm_data = 0;
235
236 sge = &send->s_sge[0];
237 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
238 sge->length = sizeof(struct rds_header);
239 sge->lkey = ic->i_mr->lkey;
240
241 send->s_sge[1].lkey = ic->i_mr->lkey;
242 }
243}
244
245void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
246{
247 struct rds_ib_send_work *send;
248 u32 i;
249
250 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
251 if (send->s_op && send->s_wr.opcode != 0xdead)
252 rds_ib_send_unmap_op(ic, send, IB_WC_WR_FLUSH_ERR);
253 }
254}
255
256
257
258
259
260static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
261{
262 if ((atomic_sub_return(nr, &ic->i_signaled_sends) == 0) &&
263 waitqueue_active(&rds_ib_ring_empty_wait))
264 wake_up(&rds_ib_ring_empty_wait);
265 BUG_ON(atomic_read(&ic->i_signaled_sends) < 0);
266}
267
268
269
270
271
272
273
274void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
275{
276 struct rds_connection *conn = context;
277 struct rds_ib_connection *ic = conn->c_transport_data;
278 struct rds_message *rm = NULL;
279 struct ib_wc wc;
280 struct rds_ib_send_work *send;
281 u32 completed;
282 u32 oldest;
283 u32 i = 0;
284 int ret;
285 int nr_sig = 0;
286
287 rdsdebug("cq %p conn %p\n", cq, conn);
288 rds_ib_stats_inc(s_ib_tx_cq_call);
289 ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
290 if (ret)
291 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
292
293 while (ib_poll_cq(cq, 1, &wc) > 0) {
294 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
295 (unsigned long long)wc.wr_id, wc.status,
296 rds_ib_wc_status_str(wc.status), wc.byte_len,
297 be32_to_cpu(wc.ex.imm_data));
298 rds_ib_stats_inc(s_ib_tx_cq_event);
299
300 if (wc.wr_id == RDS_IB_ACK_WR_ID) {
301 if (ic->i_ack_queued + HZ/2 < jiffies)
302 rds_ib_stats_inc(s_ib_tx_stalled);
303 rds_ib_ack_send_complete(ic);
304 continue;
305 }
306
307 oldest = rds_ib_ring_oldest(&ic->i_send_ring);
308
309 completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
310
311 for (i = 0; i < completed; i++) {
312 send = &ic->i_sends[oldest];
313 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
314 nr_sig++;
315
316 rm = rds_ib_send_unmap_op(ic, send, wc.status);
317
318 if (send->s_queued + HZ/2 < jiffies)
319 rds_ib_stats_inc(s_ib_tx_stalled);
320
321 if (send->s_op) {
322 if (send->s_op == rm->m_final_op) {
323
324
325 rds_message_unmapped(rm);
326 }
327 rds_message_put(rm);
328 send->s_op = NULL;
329 }
330
331 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
332 }
333
334 rds_ib_ring_free(&ic->i_send_ring, completed);
335 rds_ib_sub_signaled(ic, nr_sig);
336 nr_sig = 0;
337
338 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
339 test_bit(0, &conn->c_map_queued))
340 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
341
342
343 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
344 rds_ib_conn_error(conn, "send completion on %pI4 had status "
345 "%u (%s), disconnecting and reconnecting\n",
346 &conn->c_faddr, wc.status,
347 rds_ib_wc_status_str(wc.status));
348 }
349 }
350}
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396int rds_ib_send_grab_credits(struct rds_ib_connection *ic,
397 u32 wanted, u32 *adv_credits, int need_posted, int max_posted)
398{
399 unsigned int avail, posted, got = 0, advertise;
400 long oldval, newval;
401
402 *adv_credits = 0;
403 if (!ic->i_flowctl)
404 return wanted;
405
406try_again:
407 advertise = 0;
408 oldval = newval = atomic_read(&ic->i_credits);
409 posted = IB_GET_POST_CREDITS(oldval);
410 avail = IB_GET_SEND_CREDITS(oldval);
411
412 rdsdebug("rds_ib_send_grab_credits(%u): credits=%u posted=%u\n",
413 wanted, avail, posted);
414
415
416 if (avail && !posted)
417 avail--;
418
419 if (avail < wanted) {
420 struct rds_connection *conn = ic->i_cm_id->context;
421
422
423 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
424 got = avail;
425 } else {
426
427 got = wanted;
428 }
429 newval -= IB_SET_SEND_CREDITS(got);
430
431
432
433
434
435
436 if (posted && (got || need_posted)) {
437 advertise = min_t(unsigned int, posted, max_posted);
438 newval -= IB_SET_POST_CREDITS(advertise);
439 }
440
441
442 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
443 goto try_again;
444
445 *adv_credits = advertise;
446 return got;
447}
448
449void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits)
450{
451 struct rds_ib_connection *ic = conn->c_transport_data;
452
453 if (credits == 0)
454 return;
455
456 rdsdebug("rds_ib_send_add_credits(%u): current=%u%s\n",
457 credits,
458 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
459 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
460
461 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
462 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
463 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
464
465 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
466
467 rds_ib_stats_inc(s_ib_rx_credit_updates);
468}
469
470void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
471{
472 struct rds_ib_connection *ic = conn->c_transport_data;
473
474 if (posted == 0)
475 return;
476
477 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
478
479
480
481
482
483
484
485
486
487
488
489
490
491 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
492 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
493}
494
495static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,
496 struct rds_ib_send_work *send,
497 bool notify)
498{
499
500
501
502
503
504 if (ic->i_unsignaled_wrs-- == 0 || notify) {
505 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
506 send->s_wr.send_flags |= IB_SEND_SIGNALED;
507 return 1;
508 }
509 return 0;
510}
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
526 unsigned int hdr_off, unsigned int sg, unsigned int off)
527{
528 struct rds_ib_connection *ic = conn->c_transport_data;
529 struct ib_device *dev = ic->i_cm_id->device;
530 struct rds_ib_send_work *send = NULL;
531 struct rds_ib_send_work *first;
532 struct rds_ib_send_work *prev;
533 struct ib_send_wr *failed_wr;
534 struct scatterlist *scat;
535 u32 pos;
536 u32 i;
537 u32 work_alloc;
538 u32 credit_alloc = 0;
539 u32 posted;
540 u32 adv_credits = 0;
541 int send_flags = 0;
542 int bytes_sent = 0;
543 int ret;
544 int flow_controlled = 0;
545 int nr_sig = 0;
546
547 BUG_ON(off % RDS_FRAG_SIZE);
548 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
549
550
551 if (conn->c_loopback
552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
554 scat = &rm->data.op_sg[sg];
555 ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
556 ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
557 return ret;
558 }
559
560
561 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
562 i = 1;
563 else
564 i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
565
566 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
567 if (work_alloc == 0) {
568 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
569 rds_ib_stats_inc(s_ib_tx_ring_full);
570 ret = -ENOMEM;
571 goto out;
572 }
573
574 if (ic->i_flowctl) {
575 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
576 adv_credits += posted;
577 if (credit_alloc < work_alloc) {
578 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
579 work_alloc = credit_alloc;
580 flow_controlled = 1;
581 }
582 if (work_alloc == 0) {
583 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
584 rds_ib_stats_inc(s_ib_tx_throttle);
585 ret = -ENOMEM;
586 goto out;
587 }
588 }
589
590
591 if (!ic->i_data_op) {
592 if (rm->data.op_nents) {
593 rm->data.op_count = ib_dma_map_sg(dev,
594 rm->data.op_sg,
595 rm->data.op_nents,
596 DMA_TO_DEVICE);
597 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
598 if (rm->data.op_count == 0) {
599 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
600 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
601 ret = -ENOMEM;
602 goto out;
603 }
604 } else {
605 rm->data.op_count = 0;
606 }
607
608 rds_message_addref(rm);
609 ic->i_data_op = &rm->data;
610
611
612 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
613 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
614 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
615 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
616
617
618
619 if (rm->rdma.op_active) {
620 struct rds_ext_header_rdma ext_hdr;
621
622 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
623 rds_message_add_extension(&rm->m_inc.i_hdr,
624 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
625 }
626 if (rm->m_rdma_cookie) {
627 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
628 rds_rdma_cookie_key(rm->m_rdma_cookie),
629 rds_rdma_cookie_offset(rm->m_rdma_cookie));
630 }
631
632
633
634
635
636 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_ib_piggyb_ack(ic));
637 rds_message_make_checksum(&rm->m_inc.i_hdr);
638
639
640
641
642 if (ic->i_flowctl) {
643 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
644 adv_credits += posted;
645 BUG_ON(adv_credits > 255);
646 }
647 }
648
649
650
651
652
653
654
655 if (rm->rdma.op_active && rm->rdma.op_fence)
656 send_flags = IB_SEND_FENCE;
657
658
659 send = &ic->i_sends[pos];
660 first = send;
661 prev = NULL;
662 scat = &ic->i_data_op->op_sg[sg];
663 i = 0;
664 do {
665 unsigned int len = 0;
666
667
668 send->s_wr.send_flags = send_flags;
669 send->s_wr.opcode = IB_WR_SEND;
670 send->s_wr.num_sge = 1;
671 send->s_wr.next = NULL;
672 send->s_queued = jiffies;
673 send->s_op = NULL;
674
675 send->s_sge[0].addr = ic->i_send_hdrs_dma
676 + (pos * sizeof(struct rds_header));
677 send->s_sge[0].length = sizeof(struct rds_header);
678
679 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
680
681
682 if (i < work_alloc
683 && scat != &rm->data.op_sg[rm->data.op_count]) {
684 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
685 send->s_wr.num_sge = 2;
686
687 send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off;
688 send->s_sge[1].length = len;
689
690 bytes_sent += len;
691 off += len;
692 if (off == ib_sg_dma_len(dev, scat)) {
693 scat++;
694 off = 0;
695 }
696 }
697
698 rds_ib_set_wr_signal_state(ic, send, 0);
699
700
701
702
703 if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
704 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
705
706 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
707 nr_sig++;
708
709 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
710 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
711
712 if (ic->i_flowctl && adv_credits) {
713 struct rds_header *hdr = &ic->i_send_hdrs[pos];
714
715
716 hdr->h_credit = adv_credits;
717 rds_message_make_checksum(hdr);
718 adv_credits = 0;
719 rds_ib_stats_inc(s_ib_tx_credit_updates);
720 }
721
722 if (prev)
723 prev->s_wr.next = &send->s_wr;
724 prev = send;
725
726 pos = (pos + 1) % ic->i_send_ring.w_nr;
727 send = &ic->i_sends[pos];
728 i++;
729
730 } while (i < work_alloc
731 && scat != &rm->data.op_sg[rm->data.op_count]);
732
733
734
735 if (hdr_off == 0)
736 bytes_sent += sizeof(struct rds_header);
737
738
739 if (scat == &rm->data.op_sg[rm->data.op_count]) {
740 prev->s_op = ic->i_data_op;
741 prev->s_wr.send_flags |= IB_SEND_SOLICITED;
742 ic->i_data_op = NULL;
743 }
744
745
746 if (i < work_alloc) {
747 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
748 work_alloc = i;
749 }
750 if (ic->i_flowctl && i < credit_alloc)
751 rds_ib_send_add_credits(conn, credit_alloc - i);
752
753 if (nr_sig)
754 atomic_add(nr_sig, &ic->i_signaled_sends);
755
756
757 failed_wr = &first->s_wr;
758 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
759 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
760 first, &first->s_wr, ret, failed_wr);
761 BUG_ON(failed_wr != &first->s_wr);
762 if (ret) {
763 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
764 "returned %d\n", &conn->c_faddr, ret);
765 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
766 rds_ib_sub_signaled(ic, nr_sig);
767 if (prev->s_op) {
768 ic->i_data_op = prev->s_op;
769 prev->s_op = NULL;
770 }
771
772 rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
773 goto out;
774 }
775
776 ret = bytes_sent;
777out:
778 BUG_ON(adv_credits);
779 return ret;
780}
781
782
783
784
785
786
787int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
788{
789 struct rds_ib_connection *ic = conn->c_transport_data;
790 struct rds_ib_send_work *send = NULL;
791 struct ib_send_wr *failed_wr;
792 struct rds_ib_device *rds_ibdev;
793 u32 pos;
794 u32 work_alloc;
795 int ret;
796 int nr_sig = 0;
797
798 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
799
800 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
801 if (work_alloc != 1) {
802 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
803 rds_ib_stats_inc(s_ib_tx_ring_full);
804 ret = -ENOMEM;
805 goto out;
806 }
807
808
809 send = &ic->i_sends[pos];
810 send->s_queued = jiffies;
811
812 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
813 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
814 send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
815 send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
816 send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
817 send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
818 } else {
819 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
820 send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
821 send->s_wr.wr.atomic.swap = 0;
822 send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
823 send->s_wr.wr.atomic.swap_mask = 0;
824 }
825 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
826 send->s_wr.num_sge = 1;
827 send->s_wr.next = NULL;
828 send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
829 send->s_wr.wr.atomic.rkey = op->op_rkey;
830 send->s_op = op;
831 rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
832
833
834 ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
835 rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
836 if (ret != 1) {
837 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
838 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
839 ret = -ENOMEM;
840 goto out;
841 }
842
843
844 send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
845 send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
846 send->s_sge[0].lkey = ic->i_mr->lkey;
847
848 rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
849 send->s_sge[0].addr, send->s_sge[0].length);
850
851 if (nr_sig)
852 atomic_add(nr_sig, &ic->i_signaled_sends);
853
854 failed_wr = &send->s_wr;
855 ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
856 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
857 send, &send->s_wr, ret, failed_wr);
858 BUG_ON(failed_wr != &send->s_wr);
859 if (ret) {
860 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
861 "returned %d\n", &conn->c_faddr, ret);
862 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
863 rds_ib_sub_signaled(ic, nr_sig);
864 goto out;
865 }
866
867 if (unlikely(failed_wr != &send->s_wr)) {
868 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
869 BUG_ON(failed_wr != &send->s_wr);
870 }
871
872out:
873 return ret;
874}
875
876int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
877{
878 struct rds_ib_connection *ic = conn->c_transport_data;
879 struct rds_ib_send_work *send = NULL;
880 struct rds_ib_send_work *first;
881 struct rds_ib_send_work *prev;
882 struct ib_send_wr *failed_wr;
883 struct scatterlist *scat;
884 unsigned long len;
885 u64 remote_addr = op->op_remote_addr;
886 u32 max_sge = ic->rds_ibdev->max_sge;
887 u32 pos;
888 u32 work_alloc;
889 u32 i;
890 u32 j;
891 int sent;
892 int ret;
893 int num_sge;
894 int nr_sig = 0;
895
896
897 if (!op->op_mapped) {
898 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
899 op->op_sg, op->op_nents, (op->op_write) ?
900 DMA_TO_DEVICE : DMA_FROM_DEVICE);
901 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
902 if (op->op_count == 0) {
903 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
904 ret = -ENOMEM;
905 goto out;
906 }
907
908 op->op_mapped = 1;
909 }
910
911
912
913
914
915 i = ceil(op->op_count, max_sge);
916
917 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
918 if (work_alloc != i) {
919 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
920 rds_ib_stats_inc(s_ib_tx_ring_full);
921 ret = -ENOMEM;
922 goto out;
923 }
924
925 send = &ic->i_sends[pos];
926 first = send;
927 prev = NULL;
928 scat = &op->op_sg[0];
929 sent = 0;
930 num_sge = op->op_count;
931
932 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
933 send->s_wr.send_flags = 0;
934 send->s_queued = jiffies;
935 send->s_op = NULL;
936
937 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
938
939 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
940 send->s_wr.wr.rdma.remote_addr = remote_addr;
941 send->s_wr.wr.rdma.rkey = op->op_rkey;
942
943 if (num_sge > max_sge) {
944 send->s_wr.num_sge = max_sge;
945 num_sge -= max_sge;
946 } else {
947 send->s_wr.num_sge = num_sge;
948 }
949
950 send->s_wr.next = NULL;
951
952 if (prev)
953 prev->s_wr.next = &send->s_wr;
954
955 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
956 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
957 send->s_sge[j].addr =
958 ib_sg_dma_address(ic->i_cm_id->device, scat);
959 send->s_sge[j].length = len;
960 send->s_sge[j].lkey = ic->i_mr->lkey;
961
962 sent += len;
963 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
964
965 remote_addr += len;
966 scat++;
967 }
968
969 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
970 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
971
972 prev = send;
973 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
974 send = ic->i_sends;
975 }
976
977
978 if (scat == &op->op_sg[op->op_count]) {
979 prev->s_op = op;
980 rds_message_addref(container_of(op, struct rds_message, rdma));
981 }
982
983 if (i < work_alloc) {
984 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
985 work_alloc = i;
986 }
987
988 if (nr_sig)
989 atomic_add(nr_sig, &ic->i_signaled_sends);
990
991 failed_wr = &first->s_wr;
992 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
993 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
994 first, &first->s_wr, ret, failed_wr);
995 BUG_ON(failed_wr != &first->s_wr);
996 if (ret) {
997 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
998 "returned %d\n", &conn->c_faddr, ret);
999 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
1000 rds_ib_sub_signaled(ic, nr_sig);
1001 goto out;
1002 }
1003
1004 if (unlikely(failed_wr != &first->s_wr)) {
1005 printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
1006 BUG_ON(failed_wr != &first->s_wr);
1007 }
1008
1009
1010out:
1011 return ret;
1012}
1013
1014void rds_ib_xmit_complete(struct rds_connection *conn)
1015{
1016 struct rds_ib_connection *ic = conn->c_transport_data;
1017
1018
1019
1020 rds_ib_attempt_ack(ic);
1021}
1022