1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/kernel.h>
34#include <linux/pci.h>
35#include <linux/dma-mapping.h>
36#include <rdma/rdma_cm.h>
37
38#include "rds.h"
39#include "ib.h"
40
41static struct kmem_cache *rds_ib_incoming_slab;
42static struct kmem_cache *rds_ib_frag_slab;
43static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
44
45static void rds_ib_frag_drop_page(struct rds_page_frag *frag)
46{
47 rdsdebug("frag %p page %p\n", frag, frag->f_page);
48 __free_page(frag->f_page);
49 frag->f_page = NULL;
50}
51
52static void rds_ib_frag_free(struct rds_page_frag *frag)
53{
54 rdsdebug("frag %p page %p\n", frag, frag->f_page);
55 BUG_ON(frag->f_page != NULL);
56 kmem_cache_free(rds_ib_frag_slab, frag);
57}
58
59
60
61
62
63
64
65
66
67
68static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic,
69 struct rds_ib_recv_work *recv)
70{
71 struct rds_page_frag *frag = recv->r_frag;
72
73 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
74 if (frag->f_mapped)
75 ib_dma_unmap_page(ic->i_cm_id->device,
76 frag->f_mapped,
77 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
78 frag->f_mapped = 0;
79}
80
81void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
82{
83 struct rds_ib_recv_work *recv;
84 u32 i;
85
86 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
87 struct ib_sge *sge;
88
89 recv->r_ibinc = NULL;
90 recv->r_frag = NULL;
91
92 recv->r_wr.next = NULL;
93 recv->r_wr.wr_id = i;
94 recv->r_wr.sg_list = recv->r_sge;
95 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
96
97 sge = rds_ib_data_sge(ic, recv->r_sge);
98 sge->addr = 0;
99 sge->length = RDS_FRAG_SIZE;
100 sge->lkey = ic->i_mr->lkey;
101
102 sge = rds_ib_header_sge(ic, recv->r_sge);
103 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
104 sge->length = sizeof(struct rds_header);
105 sge->lkey = ic->i_mr->lkey;
106 }
107}
108
109static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
110 struct rds_ib_recv_work *recv)
111{
112 if (recv->r_ibinc) {
113 rds_inc_put(&recv->r_ibinc->ii_inc);
114 recv->r_ibinc = NULL;
115 }
116 if (recv->r_frag) {
117 rds_ib_recv_unmap_page(ic, recv);
118 if (recv->r_frag->f_page)
119 rds_ib_frag_drop_page(recv->r_frag);
120 rds_ib_frag_free(recv->r_frag);
121 recv->r_frag = NULL;
122 }
123}
124
125void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
126{
127 u32 i;
128
129 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
130 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
131
132 if (ic->i_frag.f_page)
133 rds_ib_frag_drop_page(&ic->i_frag);
134}
135
136static int rds_ib_recv_refill_one(struct rds_connection *conn,
137 struct rds_ib_recv_work *recv,
138 gfp_t kptr_gfp, gfp_t page_gfp)
139{
140 struct rds_ib_connection *ic = conn->c_transport_data;
141 dma_addr_t dma_addr;
142 struct ib_sge *sge;
143 int ret = -ENOMEM;
144
145 if (recv->r_ibinc == NULL) {
146 if (atomic_read(&rds_ib_allocation) >= rds_ib_sysctl_max_recv_allocation) {
147 rds_ib_stats_inc(s_ib_rx_alloc_limit);
148 goto out;
149 }
150 recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab,
151 kptr_gfp);
152 if (recv->r_ibinc == NULL)
153 goto out;
154 atomic_inc(&rds_ib_allocation);
155 INIT_LIST_HEAD(&recv->r_ibinc->ii_frags);
156 rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
157 }
158
159 if (recv->r_frag == NULL) {
160 recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, kptr_gfp);
161 if (recv->r_frag == NULL)
162 goto out;
163 INIT_LIST_HEAD(&recv->r_frag->f_item);
164 recv->r_frag->f_page = NULL;
165 }
166
167 if (ic->i_frag.f_page == NULL) {
168 ic->i_frag.f_page = alloc_page(page_gfp);
169 if (ic->i_frag.f_page == NULL)
170 goto out;
171 ic->i_frag.f_offset = 0;
172 }
173
174 dma_addr = ib_dma_map_page(ic->i_cm_id->device,
175 ic->i_frag.f_page,
176 ic->i_frag.f_offset,
177 RDS_FRAG_SIZE,
178 DMA_FROM_DEVICE);
179 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
180 goto out;
181
182
183
184
185
186
187 recv->r_frag->f_page = ic->i_frag.f_page;
188 recv->r_frag->f_offset = ic->i_frag.f_offset;
189 recv->r_frag->f_mapped = dma_addr;
190
191 sge = rds_ib_data_sge(ic, recv->r_sge);
192 sge->addr = dma_addr;
193 sge->length = RDS_FRAG_SIZE;
194
195 sge = rds_ib_header_sge(ic, recv->r_sge);
196 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
197 sge->length = sizeof(struct rds_header);
198
199 get_page(recv->r_frag->f_page);
200
201 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) {
202 ic->i_frag.f_offset += RDS_FRAG_SIZE;
203 } else {
204 put_page(ic->i_frag.f_page);
205 ic->i_frag.f_page = NULL;
206 ic->i_frag.f_offset = 0;
207 }
208
209 ret = 0;
210out:
211 return ret;
212}
213
214
215
216
217
218
219
220
221
222int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
223 gfp_t page_gfp, int prefill)
224{
225 struct rds_ib_connection *ic = conn->c_transport_data;
226 struct rds_ib_recv_work *recv;
227 struct ib_recv_wr *failed_wr;
228 unsigned int posted = 0;
229 int ret = 0;
230 u32 pos;
231
232 while ((prefill || rds_conn_up(conn))
233 && rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
234 if (pos >= ic->i_recv_ring.w_nr) {
235 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
236 pos);
237 ret = -EINVAL;
238 break;
239 }
240
241 recv = &ic->i_recvs[pos];
242 ret = rds_ib_recv_refill_one(conn, recv, kptr_gfp, page_gfp);
243 if (ret) {
244 ret = -1;
245 break;
246 }
247
248
249 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
250 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
251 recv->r_ibinc, recv->r_frag->f_page,
252 (long) recv->r_frag->f_mapped, ret);
253 if (ret) {
254 rds_ib_conn_error(conn, "recv post on "
255 "%pI4 returned %d, disconnecting and "
256 "reconnecting\n", &conn->c_faddr,
257 ret);
258 ret = -1;
259 break;
260 }
261
262 posted++;
263 }
264
265
266 if (ic->i_flowctl && posted)
267 rds_ib_advertise_credits(conn, posted);
268
269 if (ret)
270 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
271 return ret;
272}
273
274void rds_ib_inc_purge(struct rds_incoming *inc)
275{
276 struct rds_ib_incoming *ibinc;
277 struct rds_page_frag *frag;
278 struct rds_page_frag *pos;
279
280 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
281 rdsdebug("purging ibinc %p inc %p\n", ibinc, inc);
282
283 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
284 list_del_init(&frag->f_item);
285 rds_ib_frag_drop_page(frag);
286 rds_ib_frag_free(frag);
287 }
288}
289
290void rds_ib_inc_free(struct rds_incoming *inc)
291{
292 struct rds_ib_incoming *ibinc;
293
294 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
295
296 rds_ib_inc_purge(inc);
297 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
298 BUG_ON(!list_empty(&ibinc->ii_frags));
299 kmem_cache_free(rds_ib_incoming_slab, ibinc);
300 atomic_dec(&rds_ib_allocation);
301 BUG_ON(atomic_read(&rds_ib_allocation) < 0);
302}
303
304int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
305 size_t size)
306{
307 struct rds_ib_incoming *ibinc;
308 struct rds_page_frag *frag;
309 struct iovec *iov = first_iov;
310 unsigned long to_copy;
311 unsigned long frag_off = 0;
312 unsigned long iov_off = 0;
313 int copied = 0;
314 int ret;
315 u32 len;
316
317 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
318 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
319 len = be32_to_cpu(inc->i_hdr.h_len);
320
321 while (copied < size && copied < len) {
322 if (frag_off == RDS_FRAG_SIZE) {
323 frag = list_entry(frag->f_item.next,
324 struct rds_page_frag, f_item);
325 frag_off = 0;
326 }
327 while (iov_off == iov->iov_len) {
328 iov_off = 0;
329 iov++;
330 }
331
332 to_copy = min(iov->iov_len - iov_off, RDS_FRAG_SIZE - frag_off);
333 to_copy = min_t(size_t, to_copy, size - copied);
334 to_copy = min_t(unsigned long, to_copy, len - copied);
335
336 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
337 "[%p, %lu] + %lu\n",
338 to_copy, iov->iov_base, iov->iov_len, iov_off,
339 frag->f_page, frag->f_offset, frag_off);
340
341
342 ret = rds_page_copy_to_user(frag->f_page,
343 frag->f_offset + frag_off,
344 iov->iov_base + iov_off,
345 to_copy);
346 if (ret) {
347 copied = ret;
348 break;
349 }
350
351 iov_off += to_copy;
352 frag_off += to_copy;
353 copied += to_copy;
354 }
355
356 return copied;
357}
358
359
360void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
361{
362 struct ib_send_wr *wr = &ic->i_ack_wr;
363 struct ib_sge *sge = &ic->i_ack_sge;
364
365 sge->addr = ic->i_ack_dma;
366 sge->length = sizeof(struct rds_header);
367 sge->lkey = ic->i_mr->lkey;
368
369 wr->sg_list = sge;
370 wr->num_sge = 1;
371 wr->opcode = IB_WR_SEND;
372 wr->wr_id = RDS_IB_ACK_WR_ID;
373 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
374}
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398#ifndef KERNEL_HAS_ATOMIC64
399static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
400 int ack_required)
401{
402 unsigned long flags;
403
404 spin_lock_irqsave(&ic->i_ack_lock, flags);
405 ic->i_ack_next = seq;
406 if (ack_required)
407 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
408 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
409}
410
411static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
412{
413 unsigned long flags;
414 u64 seq;
415
416 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
417
418 spin_lock_irqsave(&ic->i_ack_lock, flags);
419 seq = ic->i_ack_next;
420 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
421
422 return seq;
423}
424#else
425static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
426 int ack_required)
427{
428 atomic64_set(&ic->i_ack_next, seq);
429 if (ack_required) {
430 smp_mb__before_clear_bit();
431 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
432 }
433}
434
435static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
436{
437 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
438 smp_mb__after_clear_bit();
439
440 return atomic64_read(&ic->i_ack_next);
441}
442#endif
443
444
445static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
446{
447 struct rds_header *hdr = ic->i_ack;
448 struct ib_send_wr *failed_wr;
449 u64 seq;
450 int ret;
451
452 seq = rds_ib_get_ack(ic);
453
454 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
455 rds_message_populate_header(hdr, 0, 0, 0);
456 hdr->h_ack = cpu_to_be64(seq);
457 hdr->h_credit = adv_credits;
458 rds_message_make_checksum(hdr);
459 ic->i_ack_queued = jiffies;
460
461 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
462 if (unlikely(ret)) {
463
464
465
466 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
467 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
468
469 rds_ib_stats_inc(s_ib_ack_send_failure);
470
471 BUG();
472 } else
473 rds_ib_stats_inc(s_ib_ack_sent);
474}
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514void rds_ib_attempt_ack(struct rds_ib_connection *ic)
515{
516 unsigned int adv_credits;
517
518 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
519 return;
520
521 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
522 rds_ib_stats_inc(s_ib_ack_send_delayed);
523 return;
524 }
525
526
527 if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) {
528 rds_ib_stats_inc(s_ib_tx_throttle);
529 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
530 return;
531 }
532
533 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
534 rds_ib_send_ack(ic, adv_credits);
535}
536
537
538
539
540
541void rds_ib_ack_send_complete(struct rds_ib_connection *ic)
542{
543 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
544 rds_ib_attempt_ack(ic);
545}
546
547
548
549
550
551u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
552{
553 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
554 rds_ib_stats_inc(s_ib_ack_send_piggybacked);
555 return rds_ib_get_ack(ic);
556}
557
558static struct rds_header *rds_ib_get_header(struct rds_connection *conn,
559 struct rds_ib_recv_work *recv,
560 u32 data_len)
561{
562 struct rds_ib_connection *ic = conn->c_transport_data;
563 void *hdr_buff = &ic->i_recv_hdrs[recv - ic->i_recvs];
564 void *addr;
565 u32 misplaced_hdr_bytes;
566
567
568
569
570
571
572
573
574
575
576 if (conn->c_version > RDS_PROTOCOL_3_0 || data_len == RDS_FRAG_SIZE)
577 return hdr_buff;
578
579 if (data_len <= (RDS_FRAG_SIZE - sizeof(struct rds_header))) {
580 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
581 memcpy(hdr_buff,
582 addr + recv->r_frag->f_offset + data_len,
583 sizeof(struct rds_header));
584 kunmap_atomic(addr, KM_SOFTIRQ0);
585 return hdr_buff;
586 }
587
588 misplaced_hdr_bytes = (sizeof(struct rds_header) - (RDS_FRAG_SIZE - data_len));
589
590 memmove(hdr_buff + misplaced_hdr_bytes, hdr_buff, misplaced_hdr_bytes);
591
592 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
593 memcpy(hdr_buff, addr + recv->r_frag->f_offset + data_len,
594 sizeof(struct rds_header) - misplaced_hdr_bytes);
595 kunmap_atomic(addr, KM_SOFTIRQ0);
596 return hdr_buff;
597}
598
599
600
601
602
603
604
605
606
607static void rds_ib_cong_recv(struct rds_connection *conn,
608 struct rds_ib_incoming *ibinc)
609{
610 struct rds_cong_map *map;
611 unsigned int map_off;
612 unsigned int map_page;
613 struct rds_page_frag *frag;
614 unsigned long frag_off;
615 unsigned long to_copy;
616 unsigned long copied;
617 uint64_t uncongested = 0;
618 void *addr;
619
620
621 if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
622 return;
623
624 map = conn->c_fcong;
625 map_page = 0;
626 map_off = 0;
627
628 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
629 frag_off = 0;
630
631 copied = 0;
632
633 while (copied < RDS_CONG_MAP_BYTES) {
634 uint64_t *src, *dst;
635 unsigned int k;
636
637 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
638 BUG_ON(to_copy & 7);
639
640 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0);
641
642 src = addr + frag_off;
643 dst = (void *)map->m_page_addrs[map_page] + map_off;
644 for (k = 0; k < to_copy; k += 8) {
645
646
647 uncongested |= ~(*src) & *dst;
648 *dst++ = *src++;
649 }
650 kunmap_atomic(addr, KM_SOFTIRQ0);
651
652 copied += to_copy;
653
654 map_off += to_copy;
655 if (map_off == PAGE_SIZE) {
656 map_off = 0;
657 map_page++;
658 }
659
660 frag_off += to_copy;
661 if (frag_off == RDS_FRAG_SIZE) {
662 frag = list_entry(frag->f_item.next,
663 struct rds_page_frag, f_item);
664 frag_off = 0;
665 }
666 }
667
668
669 uncongested = le64_to_cpu(uncongested);
670
671 rds_cong_map_updated(map, uncongested);
672}
673
674
675
676
677
678
679
680struct rds_ib_ack_state {
681 u64 ack_next;
682 u64 ack_recv;
683 unsigned int ack_required:1;
684 unsigned int ack_next_valid:1;
685 unsigned int ack_recv_valid:1;
686};
687
688static void rds_ib_process_recv(struct rds_connection *conn,
689 struct rds_ib_recv_work *recv, u32 data_len,
690 struct rds_ib_ack_state *state)
691{
692 struct rds_ib_connection *ic = conn->c_transport_data;
693 struct rds_ib_incoming *ibinc = ic->i_ibinc;
694 struct rds_header *ihdr, *hdr;
695
696
697
698 rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv,
699 data_len);
700
701 if (data_len < sizeof(struct rds_header)) {
702 rds_ib_conn_error(conn, "incoming message "
703 "from %pI4 didn't inclue a "
704 "header, disconnecting and "
705 "reconnecting\n",
706 &conn->c_faddr);
707 return;
708 }
709 data_len -= sizeof(struct rds_header);
710
711 ihdr = rds_ib_get_header(conn, recv, data_len);
712
713
714 if (!rds_message_verify_checksum(ihdr)) {
715 rds_ib_conn_error(conn, "incoming message "
716 "from %pI4 has corrupted header - "
717 "forcing a reconnect\n",
718 &conn->c_faddr);
719 rds_stats_inc(s_recv_drop_bad_checksum);
720 return;
721 }
722
723
724 state->ack_recv = be64_to_cpu(ihdr->h_ack);
725 state->ack_recv_valid = 1;
726
727
728 if (ihdr->h_credit)
729 rds_ib_send_add_credits(conn, ihdr->h_credit);
730
731 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && data_len == 0) {
732
733
734
735
736 rds_ib_stats_inc(s_ib_ack_received);
737
738
739
740
741
742
743
744
745
746
747
748 rds_ib_frag_drop_page(recv->r_frag);
749 return;
750 }
751
752
753
754
755
756
757
758 if (ibinc == NULL) {
759 ibinc = recv->r_ibinc;
760 recv->r_ibinc = NULL;
761 ic->i_ibinc = ibinc;
762
763 hdr = &ibinc->ii_inc.i_hdr;
764 memcpy(hdr, ihdr, sizeof(*hdr));
765 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
766
767 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
768 ic->i_recv_data_rem, hdr->h_flags);
769 } else {
770 hdr = &ibinc->ii_inc.i_hdr;
771
772
773 if (hdr->h_sequence != ihdr->h_sequence
774 || hdr->h_len != ihdr->h_len
775 || hdr->h_sport != ihdr->h_sport
776 || hdr->h_dport != ihdr->h_dport) {
777 rds_ib_conn_error(conn,
778 "fragment header mismatch; forcing reconnect\n");
779 return;
780 }
781 }
782
783 list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags);
784 recv->r_frag = NULL;
785
786 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
787 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
788 else {
789 ic->i_recv_data_rem = 0;
790 ic->i_ibinc = NULL;
791
792 if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
793 rds_ib_cong_recv(conn, ibinc);
794 else {
795 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
796 &ibinc->ii_inc, GFP_ATOMIC,
797 KM_SOFTIRQ0);
798 state->ack_next = be64_to_cpu(hdr->h_sequence);
799 state->ack_next_valid = 1;
800 }
801
802
803
804
805 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
806 rds_stats_inc(s_recv_ack_required);
807 state->ack_required = 1;
808 }
809
810 rds_inc_put(&ibinc->ii_inc);
811 }
812}
813
814
815
816
817
818
819
820
821
822
823void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context)
824{
825 struct rds_connection *conn = context;
826 struct rds_ib_connection *ic = conn->c_transport_data;
827 struct ib_wc wc;
828 struct rds_ib_ack_state state = { 0, };
829 struct rds_ib_recv_work *recv;
830
831 rdsdebug("conn %p cq %p\n", conn, cq);
832
833 rds_ib_stats_inc(s_ib_rx_cq_call);
834
835 ib_req_notify_cq(cq, IB_CQ_SOLICITED);
836
837 while (ib_poll_cq(cq, 1, &wc) > 0) {
838 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
839 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
840 be32_to_cpu(wc.ex.imm_data));
841 rds_ib_stats_inc(s_ib_rx_cq_event);
842
843 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
844
845 rds_ib_recv_unmap_page(ic, recv);
846
847
848
849
850
851
852 if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
853
854 if (wc.status == IB_WC_SUCCESS) {
855 rds_ib_process_recv(conn, recv, wc.byte_len, &state);
856 } else {
857 rds_ib_conn_error(conn, "recv completion on "
858 "%pI4 had status %u, disconnecting and "
859 "reconnecting\n", &conn->c_faddr,
860 wc.status);
861 }
862 }
863
864 rds_ib_ring_free(&ic->i_recv_ring, 1);
865 }
866
867 if (state.ack_next_valid)
868 rds_ib_set_ack(ic, state.ack_next, state.ack_required);
869 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
870 rds_send_drop_acked(conn, state.ack_recv, NULL);
871 ic->i_ack_recv = state.ack_recv;
872 }
873 if (rds_conn_up(conn))
874 rds_ib_attempt_ack(ic);
875
876
877
878
879 if (rds_ib_ring_empty(&ic->i_recv_ring))
880 rds_ib_stats_inc(s_ib_rx_ring_empty);
881
882
883
884
885 if (rds_ib_ring_low(&ic->i_recv_ring))
886 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
887}
888
889int rds_ib_recv(struct rds_connection *conn)
890{
891 struct rds_ib_connection *ic = conn->c_transport_data;
892 int ret = 0;
893
894 rdsdebug("conn %p\n", conn);
895
896
897
898
899
900 mutex_lock(&ic->i_recv_mutex);
901 if (rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
902 ret = -ENOMEM;
903 else
904 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
905 mutex_unlock(&ic->i_recv_mutex);
906
907 if (rds_conn_up(conn))
908 rds_ib_attempt_ack(ic);
909
910 return ret;
911}
912
913int __init rds_ib_recv_init(void)
914{
915 struct sysinfo si;
916 int ret = -ENOMEM;
917
918
919 si_meminfo(&si);
920 rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
921
922 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
923 sizeof(struct rds_ib_incoming),
924 0, 0, NULL);
925 if (rds_ib_incoming_slab == NULL)
926 goto out;
927
928 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
929 sizeof(struct rds_page_frag),
930 0, 0, NULL);
931 if (rds_ib_frag_slab == NULL)
932 kmem_cache_destroy(rds_ib_incoming_slab);
933 else
934 ret = 0;
935out:
936 return ret;
937}
938
939void rds_ib_recv_exit(void)
940{
941 kmem_cache_destroy(rds_ib_incoming_slab);
942 kmem_cache_destroy(rds_ib_frag_slab);
943}
944