1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/kernel.h>
34#include <linux/slab.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <rdma/rdma_cm.h>
38
39#include "rds.h"
40#include "ib.h"
41
42static struct kmem_cache *rds_ib_incoming_slab;
43static struct kmem_cache *rds_ib_frag_slab;
44static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
45
46void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
47{
48 struct rds_ib_recv_work *recv;
49 u32 i;
50
51 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
52 struct ib_sge *sge;
53
54 recv->r_ibinc = NULL;
55 recv->r_frag = NULL;
56
57 recv->r_wr.next = NULL;
58 recv->r_wr.wr_id = i;
59 recv->r_wr.sg_list = recv->r_sge;
60 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
61
62 sge = &recv->r_sge[0];
63 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
64 sge->length = sizeof(struct rds_header);
65 sge->lkey = ic->i_mr->lkey;
66
67 sge = &recv->r_sge[1];
68 sge->addr = 0;
69 sge->length = RDS_FRAG_SIZE;
70 sge->lkey = ic->i_mr->lkey;
71 }
72}
73
74
75
76
77
78static void list_splice_entire_tail(struct list_head *from,
79 struct list_head *to)
80{
81 struct list_head *from_last = from->prev;
82
83 list_splice_tail(from_last, to);
84 list_add_tail(from_last, to);
85}
86
87static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
88{
89 struct list_head *tmp;
90
91 tmp = xchg(&cache->xfer, NULL);
92 if (tmp) {
93 if (cache->ready)
94 list_splice_entire_tail(tmp, cache->ready);
95 else
96 cache->ready = tmp;
97 }
98}
99
100static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
101{
102 struct rds_ib_cache_head *head;
103 int cpu;
104
105 cache->percpu = alloc_percpu(struct rds_ib_cache_head);
106 if (!cache->percpu)
107 return -ENOMEM;
108
109 for_each_possible_cpu(cpu) {
110 head = per_cpu_ptr(cache->percpu, cpu);
111 head->first = NULL;
112 head->count = 0;
113 }
114 cache->xfer = NULL;
115 cache->ready = NULL;
116
117 return 0;
118}
119
120int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
121{
122 int ret;
123
124 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
125 if (!ret) {
126 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
127 if (ret)
128 free_percpu(ic->i_cache_incs.percpu);
129 }
130
131 return ret;
132}
133
134static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
135 struct list_head *caller_list)
136{
137 struct rds_ib_cache_head *head;
138 int cpu;
139
140 for_each_possible_cpu(cpu) {
141 head = per_cpu_ptr(cache->percpu, cpu);
142 if (head->first) {
143 list_splice_entire_tail(head->first, caller_list);
144 head->first = NULL;
145 }
146 }
147
148 if (cache->ready) {
149 list_splice_entire_tail(cache->ready, caller_list);
150 cache->ready = NULL;
151 }
152}
153
154void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
155{
156 struct rds_ib_incoming *inc;
157 struct rds_ib_incoming *inc_tmp;
158 struct rds_page_frag *frag;
159 struct rds_page_frag *frag_tmp;
160 LIST_HEAD(list);
161
162 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
163 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
164 free_percpu(ic->i_cache_incs.percpu);
165
166 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
167 list_del(&inc->ii_cache_entry);
168 WARN_ON(!list_empty(&inc->ii_frags));
169 kmem_cache_free(rds_ib_incoming_slab, inc);
170 }
171
172 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
173 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
174 free_percpu(ic->i_cache_frags.percpu);
175
176 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
177 list_del(&frag->f_cache_entry);
178 WARN_ON(!list_empty(&frag->f_item));
179 kmem_cache_free(rds_ib_frag_slab, frag);
180 }
181}
182
183
184static void rds_ib_recv_cache_put(struct list_head *new_item,
185 struct rds_ib_refill_cache *cache);
186static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
187
188
189
190static void rds_ib_frag_free(struct rds_ib_connection *ic,
191 struct rds_page_frag *frag)
192{
193 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
194
195 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
196}
197
198
199void rds_ib_inc_free(struct rds_incoming *inc)
200{
201 struct rds_ib_incoming *ibinc;
202 struct rds_page_frag *frag;
203 struct rds_page_frag *pos;
204 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
205
206 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
207
208
209 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
210 list_del_init(&frag->f_item);
211 rds_ib_frag_free(ic, frag);
212 }
213 BUG_ON(!list_empty(&ibinc->ii_frags));
214
215 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
216 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
217}
218
219static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
220 struct rds_ib_recv_work *recv)
221{
222 if (recv->r_ibinc) {
223 rds_inc_put(&recv->r_ibinc->ii_inc);
224 recv->r_ibinc = NULL;
225 }
226 if (recv->r_frag) {
227 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
228 rds_ib_frag_free(ic, recv->r_frag);
229 recv->r_frag = NULL;
230 }
231}
232
233void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
234{
235 u32 i;
236
237 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
238 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
239}
240
241static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
242 gfp_t slab_mask)
243{
244 struct rds_ib_incoming *ibinc;
245 struct list_head *cache_item;
246 int avail_allocs;
247
248 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
249 if (cache_item) {
250 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
251 } else {
252 avail_allocs = atomic_add_unless(&rds_ib_allocation,
253 1, rds_ib_sysctl_max_recv_allocation);
254 if (!avail_allocs) {
255 rds_ib_stats_inc(s_ib_rx_alloc_limit);
256 return NULL;
257 }
258 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
259 if (!ibinc) {
260 atomic_dec(&rds_ib_allocation);
261 return NULL;
262 }
263 }
264 INIT_LIST_HEAD(&ibinc->ii_frags);
265 rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
266
267 return ibinc;
268}
269
270static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
271 gfp_t slab_mask, gfp_t page_mask)
272{
273 struct rds_page_frag *frag;
274 struct list_head *cache_item;
275 int ret;
276
277 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
278 if (cache_item) {
279 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
280 } else {
281 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
282 if (!frag)
283 return NULL;
284
285 sg_init_table(&frag->f_sg, 1);
286 ret = rds_page_remainder_alloc(&frag->f_sg,
287 RDS_FRAG_SIZE, page_mask);
288 if (ret) {
289 kmem_cache_free(rds_ib_frag_slab, frag);
290 return NULL;
291 }
292 }
293
294 INIT_LIST_HEAD(&frag->f_item);
295
296 return frag;
297}
298
299static int rds_ib_recv_refill_one(struct rds_connection *conn,
300 struct rds_ib_recv_work *recv, int prefill)
301{
302 struct rds_ib_connection *ic = conn->c_transport_data;
303 struct ib_sge *sge;
304 int ret = -ENOMEM;
305 gfp_t slab_mask = GFP_NOWAIT;
306 gfp_t page_mask = GFP_NOWAIT;
307
308 if (prefill) {
309 slab_mask = GFP_KERNEL;
310 page_mask = GFP_HIGHUSER;
311 }
312
313 if (!ic->i_cache_incs.ready)
314 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
315 if (!ic->i_cache_frags.ready)
316 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
317
318
319
320
321
322 if (!recv->r_ibinc) {
323 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
324 if (!recv->r_ibinc)
325 goto out;
326 }
327
328 WARN_ON(recv->r_frag);
329 recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
330 if (!recv->r_frag)
331 goto out;
332
333 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
334 1, DMA_FROM_DEVICE);
335 WARN_ON(ret != 1);
336
337 sge = &recv->r_sge[0];
338 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
339 sge->length = sizeof(struct rds_header);
340
341 sge = &recv->r_sge[1];
342 sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg);
343 sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg);
344
345 ret = 0;
346out:
347 return ret;
348}
349
350
351
352
353
354
355
356
357void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
358{
359 struct rds_ib_connection *ic = conn->c_transport_data;
360 struct rds_ib_recv_work *recv;
361 struct ib_recv_wr *failed_wr;
362 unsigned int posted = 0;
363 int ret = 0;
364 u32 pos;
365
366 while ((prefill || rds_conn_up(conn)) &&
367 rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
368 if (pos >= ic->i_recv_ring.w_nr) {
369 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
370 pos);
371 break;
372 }
373
374 recv = &ic->i_recvs[pos];
375 ret = rds_ib_recv_refill_one(conn, recv, prefill);
376 if (ret) {
377 break;
378 }
379
380
381 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
382 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
383 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
384 (long) ib_sg_dma_address(
385 ic->i_cm_id->device,
386 &recv->r_frag->f_sg),
387 ret);
388 if (ret) {
389 rds_ib_conn_error(conn, "recv post on "
390 "%pI4 returned %d, disconnecting and "
391 "reconnecting\n", &conn->c_faddr,
392 ret);
393 break;
394 }
395
396 posted++;
397 }
398
399
400 if (ic->i_flowctl && posted)
401 rds_ib_advertise_credits(conn, posted);
402
403 if (ret)
404 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
405}
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420static void rds_ib_recv_cache_put(struct list_head *new_item,
421 struct rds_ib_refill_cache *cache)
422{
423 unsigned long flags;
424 struct list_head *old;
425 struct list_head __percpu *chpfirst;
426
427 local_irq_save(flags);
428
429 chpfirst = __this_cpu_read(cache->percpu->first);
430 if (!chpfirst)
431 INIT_LIST_HEAD(new_item);
432 else
433 list_add_tail(new_item, chpfirst);
434
435 __this_cpu_write(chpfirst, new_item);
436 __this_cpu_inc(cache->percpu->count);
437
438 if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT)
439 goto end;
440
441
442
443
444
445
446
447 do {
448 old = xchg(&cache->xfer, NULL);
449 if (old)
450 list_splice_entire_tail(old, chpfirst);
451 old = cmpxchg(&cache->xfer, NULL, chpfirst);
452 } while (old);
453
454
455 __this_cpu_write(chpfirst, NULL);
456 __this_cpu_write(cache->percpu->count, 0);
457end:
458 local_irq_restore(flags);
459}
460
461static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
462{
463 struct list_head *head = cache->ready;
464
465 if (head) {
466 if (!list_empty(head)) {
467 cache->ready = head->next;
468 list_del_init(head);
469 } else
470 cache->ready = NULL;
471 }
472
473 return head;
474}
475
476int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
477 size_t size)
478{
479 struct rds_ib_incoming *ibinc;
480 struct rds_page_frag *frag;
481 struct iovec *iov = first_iov;
482 unsigned long to_copy;
483 unsigned long frag_off = 0;
484 unsigned long iov_off = 0;
485 int copied = 0;
486 int ret;
487 u32 len;
488
489 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
490 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
491 len = be32_to_cpu(inc->i_hdr.h_len);
492
493 while (copied < size && copied < len) {
494 if (frag_off == RDS_FRAG_SIZE) {
495 frag = list_entry(frag->f_item.next,
496 struct rds_page_frag, f_item);
497 frag_off = 0;
498 }
499 while (iov_off == iov->iov_len) {
500 iov_off = 0;
501 iov++;
502 }
503
504 to_copy = min(iov->iov_len - iov_off, RDS_FRAG_SIZE - frag_off);
505 to_copy = min_t(size_t, to_copy, size - copied);
506 to_copy = min_t(unsigned long, to_copy, len - copied);
507
508 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
509 "[%p, %u] + %lu\n",
510 to_copy, iov->iov_base, iov->iov_len, iov_off,
511 sg_page(&frag->f_sg), frag->f_sg.offset, frag_off);
512
513
514 ret = rds_page_copy_to_user(sg_page(&frag->f_sg),
515 frag->f_sg.offset + frag_off,
516 iov->iov_base + iov_off,
517 to_copy);
518 if (ret) {
519 copied = ret;
520 break;
521 }
522
523 iov_off += to_copy;
524 frag_off += to_copy;
525 copied += to_copy;
526 }
527
528 return copied;
529}
530
531
532void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
533{
534 struct ib_send_wr *wr = &ic->i_ack_wr;
535 struct ib_sge *sge = &ic->i_ack_sge;
536
537 sge->addr = ic->i_ack_dma;
538 sge->length = sizeof(struct rds_header);
539 sge->lkey = ic->i_mr->lkey;
540
541 wr->sg_list = sge;
542 wr->num_sge = 1;
543 wr->opcode = IB_WR_SEND;
544 wr->wr_id = RDS_IB_ACK_WR_ID;
545 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
546}
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570#ifndef KERNEL_HAS_ATOMIC64
571static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
572 int ack_required)
573{
574 unsigned long flags;
575
576 spin_lock_irqsave(&ic->i_ack_lock, flags);
577 ic->i_ack_next = seq;
578 if (ack_required)
579 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
580 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
581}
582
583static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
584{
585 unsigned long flags;
586 u64 seq;
587
588 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
589
590 spin_lock_irqsave(&ic->i_ack_lock, flags);
591 seq = ic->i_ack_next;
592 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
593
594 return seq;
595}
596#else
597static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
598 int ack_required)
599{
600 atomic64_set(&ic->i_ack_next, seq);
601 if (ack_required) {
602 smp_mb__before_clear_bit();
603 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
604 }
605}
606
607static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
608{
609 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
610 smp_mb__after_clear_bit();
611
612 return atomic64_read(&ic->i_ack_next);
613}
614#endif
615
616
617static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
618{
619 struct rds_header *hdr = ic->i_ack;
620 struct ib_send_wr *failed_wr;
621 u64 seq;
622 int ret;
623
624 seq = rds_ib_get_ack(ic);
625
626 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
627 rds_message_populate_header(hdr, 0, 0, 0);
628 hdr->h_ack = cpu_to_be64(seq);
629 hdr->h_credit = adv_credits;
630 rds_message_make_checksum(hdr);
631 ic->i_ack_queued = jiffies;
632
633 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
634 if (unlikely(ret)) {
635
636
637
638 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
639 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
640
641 rds_ib_stats_inc(s_ib_ack_send_failure);
642
643 rds_ib_conn_error(ic->conn, "sending ack failed\n");
644 } else
645 rds_ib_stats_inc(s_ib_ack_sent);
646}
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686void rds_ib_attempt_ack(struct rds_ib_connection *ic)
687{
688 unsigned int adv_credits;
689
690 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
691 return;
692
693 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
694 rds_ib_stats_inc(s_ib_ack_send_delayed);
695 return;
696 }
697
698
699 if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) {
700 rds_ib_stats_inc(s_ib_tx_throttle);
701 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
702 return;
703 }
704
705 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
706 rds_ib_send_ack(ic, adv_credits);
707}
708
709
710
711
712
713void rds_ib_ack_send_complete(struct rds_ib_connection *ic)
714{
715 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
716 rds_ib_attempt_ack(ic);
717}
718
719
720
721
722
723u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
724{
725 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
726 rds_ib_stats_inc(s_ib_ack_send_piggybacked);
727 return rds_ib_get_ack(ic);
728}
729
730
731
732
733
734
735
736
737
738static void rds_ib_cong_recv(struct rds_connection *conn,
739 struct rds_ib_incoming *ibinc)
740{
741 struct rds_cong_map *map;
742 unsigned int map_off;
743 unsigned int map_page;
744 struct rds_page_frag *frag;
745 unsigned long frag_off;
746 unsigned long to_copy;
747 unsigned long copied;
748 uint64_t uncongested = 0;
749 void *addr;
750
751
752 if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
753 return;
754
755 map = conn->c_fcong;
756 map_page = 0;
757 map_off = 0;
758
759 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
760 frag_off = 0;
761
762 copied = 0;
763
764 while (copied < RDS_CONG_MAP_BYTES) {
765 uint64_t *src, *dst;
766 unsigned int k;
767
768 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
769 BUG_ON(to_copy & 7);
770
771 addr = kmap_atomic(sg_page(&frag->f_sg));
772
773 src = addr + frag_off;
774 dst = (void *)map->m_page_addrs[map_page] + map_off;
775 for (k = 0; k < to_copy; k += 8) {
776
777
778 uncongested |= ~(*src) & *dst;
779 *dst++ = *src++;
780 }
781 kunmap_atomic(addr);
782
783 copied += to_copy;
784
785 map_off += to_copy;
786 if (map_off == PAGE_SIZE) {
787 map_off = 0;
788 map_page++;
789 }
790
791 frag_off += to_copy;
792 if (frag_off == RDS_FRAG_SIZE) {
793 frag = list_entry(frag->f_item.next,
794 struct rds_page_frag, f_item);
795 frag_off = 0;
796 }
797 }
798
799
800 uncongested = le64_to_cpu(uncongested);
801
802 rds_cong_map_updated(map, uncongested);
803}
804
805
806
807
808
809
810
811struct rds_ib_ack_state {
812 u64 ack_next;
813 u64 ack_recv;
814 unsigned int ack_required:1;
815 unsigned int ack_next_valid:1;
816 unsigned int ack_recv_valid:1;
817};
818
819static void rds_ib_process_recv(struct rds_connection *conn,
820 struct rds_ib_recv_work *recv, u32 data_len,
821 struct rds_ib_ack_state *state)
822{
823 struct rds_ib_connection *ic = conn->c_transport_data;
824 struct rds_ib_incoming *ibinc = ic->i_ibinc;
825 struct rds_header *ihdr, *hdr;
826
827
828
829 rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv,
830 data_len);
831
832 if (data_len < sizeof(struct rds_header)) {
833 rds_ib_conn_error(conn, "incoming message "
834 "from %pI4 didn't include a "
835 "header, disconnecting and "
836 "reconnecting\n",
837 &conn->c_faddr);
838 return;
839 }
840 data_len -= sizeof(struct rds_header);
841
842 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
843
844
845 if (!rds_message_verify_checksum(ihdr)) {
846 rds_ib_conn_error(conn, "incoming message "
847 "from %pI4 has corrupted header - "
848 "forcing a reconnect\n",
849 &conn->c_faddr);
850 rds_stats_inc(s_recv_drop_bad_checksum);
851 return;
852 }
853
854
855 state->ack_recv = be64_to_cpu(ihdr->h_ack);
856 state->ack_recv_valid = 1;
857
858
859 if (ihdr->h_credit)
860 rds_ib_send_add_credits(conn, ihdr->h_credit);
861
862 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && data_len == 0) {
863
864
865
866
867 rds_ib_stats_inc(s_ib_ack_received);
868
869
870
871
872
873
874
875
876
877
878 rds_ib_frag_free(ic, recv->r_frag);
879 recv->r_frag = NULL;
880 return;
881 }
882
883
884
885
886
887
888
889 if (!ibinc) {
890 ibinc = recv->r_ibinc;
891 recv->r_ibinc = NULL;
892 ic->i_ibinc = ibinc;
893
894 hdr = &ibinc->ii_inc.i_hdr;
895 memcpy(hdr, ihdr, sizeof(*hdr));
896 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
897
898 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
899 ic->i_recv_data_rem, hdr->h_flags);
900 } else {
901 hdr = &ibinc->ii_inc.i_hdr;
902
903
904 if (hdr->h_sequence != ihdr->h_sequence ||
905 hdr->h_len != ihdr->h_len ||
906 hdr->h_sport != ihdr->h_sport ||
907 hdr->h_dport != ihdr->h_dport) {
908 rds_ib_conn_error(conn,
909 "fragment header mismatch; forcing reconnect\n");
910 return;
911 }
912 }
913
914 list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags);
915 recv->r_frag = NULL;
916
917 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
918 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
919 else {
920 ic->i_recv_data_rem = 0;
921 ic->i_ibinc = NULL;
922
923 if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
924 rds_ib_cong_recv(conn, ibinc);
925 else {
926 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
927 &ibinc->ii_inc, GFP_ATOMIC);
928 state->ack_next = be64_to_cpu(hdr->h_sequence);
929 state->ack_next_valid = 1;
930 }
931
932
933
934
935 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
936 rds_stats_inc(s_recv_ack_required);
937 state->ack_required = 1;
938 }
939
940 rds_inc_put(&ibinc->ii_inc);
941 }
942}
943
944
945
946
947
948
949
950
951
952
953void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context)
954{
955 struct rds_connection *conn = context;
956 struct rds_ib_connection *ic = conn->c_transport_data;
957
958 rdsdebug("conn %p cq %p\n", conn, cq);
959
960 rds_ib_stats_inc(s_ib_rx_cq_call);
961
962 tasklet_schedule(&ic->i_recv_tasklet);
963}
964
965static inline void rds_poll_cq(struct rds_ib_connection *ic,
966 struct rds_ib_ack_state *state)
967{
968 struct rds_connection *conn = ic->conn;
969 struct ib_wc wc;
970 struct rds_ib_recv_work *recv;
971
972 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
973 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
974 (unsigned long long)wc.wr_id, wc.status,
975 rds_ib_wc_status_str(wc.status), wc.byte_len,
976 be32_to_cpu(wc.ex.imm_data));
977 rds_ib_stats_inc(s_ib_rx_cq_event);
978
979 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
980
981 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
982
983
984
985
986
987
988 if (wc.status == IB_WC_SUCCESS) {
989 rds_ib_process_recv(conn, recv, wc.byte_len, state);
990 } else {
991
992 if (rds_conn_up(conn) || rds_conn_connecting(conn))
993 rds_ib_conn_error(conn, "recv completion on %pI4 had "
994 "status %u (%s), disconnecting and "
995 "reconnecting\n", &conn->c_faddr,
996 wc.status,
997 rds_ib_wc_status_str(wc.status));
998 }
999
1000
1001
1002
1003
1004
1005 rds_ib_ring_free(&ic->i_recv_ring, 1);
1006 }
1007}
1008
1009void rds_ib_recv_tasklet_fn(unsigned long data)
1010{
1011 struct rds_ib_connection *ic = (struct rds_ib_connection *) data;
1012 struct rds_connection *conn = ic->conn;
1013 struct rds_ib_ack_state state = { 0, };
1014
1015 rds_poll_cq(ic, &state);
1016 ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
1017 rds_poll_cq(ic, &state);
1018
1019 if (state.ack_next_valid)
1020 rds_ib_set_ack(ic, state.ack_next, state.ack_required);
1021 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
1022 rds_send_drop_acked(conn, state.ack_recv, NULL);
1023 ic->i_ack_recv = state.ack_recv;
1024 }
1025 if (rds_conn_up(conn))
1026 rds_ib_attempt_ack(ic);
1027
1028
1029
1030
1031 if (rds_ib_ring_empty(&ic->i_recv_ring))
1032 rds_ib_stats_inc(s_ib_rx_ring_empty);
1033
1034 if (rds_ib_ring_low(&ic->i_recv_ring))
1035 rds_ib_recv_refill(conn, 0);
1036}
1037
1038int rds_ib_recv(struct rds_connection *conn)
1039{
1040 struct rds_ib_connection *ic = conn->c_transport_data;
1041 int ret = 0;
1042
1043 rdsdebug("conn %p\n", conn);
1044 if (rds_conn_up(conn))
1045 rds_ib_attempt_ack(ic);
1046
1047 return ret;
1048}
1049
1050int rds_ib_recv_init(void)
1051{
1052 struct sysinfo si;
1053 int ret = -ENOMEM;
1054
1055
1056 si_meminfo(&si);
1057 rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
1058
1059 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
1060 sizeof(struct rds_ib_incoming),
1061 0, SLAB_HWCACHE_ALIGN, NULL);
1062 if (!rds_ib_incoming_slab)
1063 goto out;
1064
1065 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
1066 sizeof(struct rds_page_frag),
1067 0, SLAB_HWCACHE_ALIGN, NULL);
1068 if (!rds_ib_frag_slab)
1069 kmem_cache_destroy(rds_ib_incoming_slab);
1070 else
1071 ret = 0;
1072out:
1073 return ret;
1074}
1075
1076void rds_ib_recv_exit(void)
1077{
1078 kmem_cache_destroy(rds_ib_incoming_slab);
1079 kmem_cache_destroy(rds_ib_frag_slab);
1080}
1081