1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/kernel.h>
34#include <linux/slab.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <rdma/rdma_cm.h>
38
39#include "rds.h"
40#include "ib.h"
41
42static struct kmem_cache *rds_ib_incoming_slab;
43static struct kmem_cache *rds_ib_frag_slab;
44static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
45
46void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
47{
48 struct rds_ib_recv_work *recv;
49 u32 i;
50
51 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
52 struct ib_sge *sge;
53
54 recv->r_ibinc = NULL;
55 recv->r_frag = NULL;
56
57 recv->r_wr.next = NULL;
58 recv->r_wr.wr_id = i;
59 recv->r_wr.sg_list = recv->r_sge;
60 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
61
62 sge = &recv->r_sge[0];
63 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
64 sge->length = sizeof(struct rds_header);
65 sge->lkey = ic->i_mr->lkey;
66
67 sge = &recv->r_sge[1];
68 sge->addr = 0;
69 sge->length = RDS_FRAG_SIZE;
70 sge->lkey = ic->i_mr->lkey;
71 }
72}
73
74
75
76
77
78static void list_splice_entire_tail(struct list_head *from,
79 struct list_head *to)
80{
81 struct list_head *from_last = from->prev;
82
83 list_splice_tail(from_last, to);
84 list_add_tail(from_last, to);
85}
86
87static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
88{
89 struct list_head *tmp;
90
91 tmp = xchg(&cache->xfer, NULL);
92 if (tmp) {
93 if (cache->ready)
94 list_splice_entire_tail(tmp, cache->ready);
95 else
96 cache->ready = tmp;
97 }
98}
99
100static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
101{
102 struct rds_ib_cache_head *head;
103 int cpu;
104
105 cache->percpu = alloc_percpu(struct rds_ib_cache_head);
106 if (!cache->percpu)
107 return -ENOMEM;
108
109 for_each_possible_cpu(cpu) {
110 head = per_cpu_ptr(cache->percpu, cpu);
111 head->first = NULL;
112 head->count = 0;
113 }
114 cache->xfer = NULL;
115 cache->ready = NULL;
116
117 return 0;
118}
119
120int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
121{
122 int ret;
123
124 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
125 if (!ret) {
126 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
127 if (ret)
128 free_percpu(ic->i_cache_incs.percpu);
129 }
130
131 return ret;
132}
133
134static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
135 struct list_head *caller_list)
136{
137 struct rds_ib_cache_head *head;
138 int cpu;
139
140 for_each_possible_cpu(cpu) {
141 head = per_cpu_ptr(cache->percpu, cpu);
142 if (head->first) {
143 list_splice_entire_tail(head->first, caller_list);
144 head->first = NULL;
145 }
146 }
147
148 if (cache->ready) {
149 list_splice_entire_tail(cache->ready, caller_list);
150 cache->ready = NULL;
151 }
152}
153
154void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
155{
156 struct rds_ib_incoming *inc;
157 struct rds_ib_incoming *inc_tmp;
158 struct rds_page_frag *frag;
159 struct rds_page_frag *frag_tmp;
160 LIST_HEAD(list);
161
162 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
163 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
164 free_percpu(ic->i_cache_incs.percpu);
165
166 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
167 list_del(&inc->ii_cache_entry);
168 WARN_ON(!list_empty(&inc->ii_frags));
169 kmem_cache_free(rds_ib_incoming_slab, inc);
170 }
171
172 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
173 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
174 free_percpu(ic->i_cache_frags.percpu);
175
176 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
177 list_del(&frag->f_cache_entry);
178 WARN_ON(!list_empty(&frag->f_item));
179 kmem_cache_free(rds_ib_frag_slab, frag);
180 }
181}
182
183
184static void rds_ib_recv_cache_put(struct list_head *new_item,
185 struct rds_ib_refill_cache *cache);
186static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
187
188
189
190static void rds_ib_frag_free(struct rds_ib_connection *ic,
191 struct rds_page_frag *frag)
192{
193 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
194
195 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
196}
197
198
199void rds_ib_inc_free(struct rds_incoming *inc)
200{
201 struct rds_ib_incoming *ibinc;
202 struct rds_page_frag *frag;
203 struct rds_page_frag *pos;
204 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
205
206 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
207
208
209 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
210 list_del_init(&frag->f_item);
211 rds_ib_frag_free(ic, frag);
212 }
213 BUG_ON(!list_empty(&ibinc->ii_frags));
214
215 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
216 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
217}
218
219static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
220 struct rds_ib_recv_work *recv)
221{
222 if (recv->r_ibinc) {
223 rds_inc_put(&recv->r_ibinc->ii_inc);
224 recv->r_ibinc = NULL;
225 }
226 if (recv->r_frag) {
227 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
228 rds_ib_frag_free(ic, recv->r_frag);
229 recv->r_frag = NULL;
230 }
231}
232
233void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
234{
235 u32 i;
236
237 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
238 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
239}
240
241static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
242 gfp_t slab_mask)
243{
244 struct rds_ib_incoming *ibinc;
245 struct list_head *cache_item;
246 int avail_allocs;
247
248 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
249 if (cache_item) {
250 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
251 } else {
252 avail_allocs = atomic_add_unless(&rds_ib_allocation,
253 1, rds_ib_sysctl_max_recv_allocation);
254 if (!avail_allocs) {
255 rds_ib_stats_inc(s_ib_rx_alloc_limit);
256 return NULL;
257 }
258 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
259 if (!ibinc) {
260 atomic_dec(&rds_ib_allocation);
261 return NULL;
262 }
263 }
264 INIT_LIST_HEAD(&ibinc->ii_frags);
265 rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
266
267 return ibinc;
268}
269
270static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
271 gfp_t slab_mask, gfp_t page_mask)
272{
273 struct rds_page_frag *frag;
274 struct list_head *cache_item;
275 int ret;
276
277 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
278 if (cache_item) {
279 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
280 } else {
281 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
282 if (!frag)
283 return NULL;
284
285 sg_init_table(&frag->f_sg, 1);
286 ret = rds_page_remainder_alloc(&frag->f_sg,
287 RDS_FRAG_SIZE, page_mask);
288 if (ret) {
289 kmem_cache_free(rds_ib_frag_slab, frag);
290 return NULL;
291 }
292 }
293
294 INIT_LIST_HEAD(&frag->f_item);
295
296 return frag;
297}
298
299static int rds_ib_recv_refill_one(struct rds_connection *conn,
300 struct rds_ib_recv_work *recv, int prefill)
301{
302 struct rds_ib_connection *ic = conn->c_transport_data;
303 struct ib_sge *sge;
304 int ret = -ENOMEM;
305 gfp_t slab_mask = GFP_NOWAIT;
306 gfp_t page_mask = GFP_NOWAIT;
307
308 if (prefill) {
309 slab_mask = GFP_KERNEL;
310 page_mask = GFP_HIGHUSER;
311 }
312
313 if (!ic->i_cache_incs.ready)
314 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
315 if (!ic->i_cache_frags.ready)
316 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
317
318
319
320
321
322 if (!recv->r_ibinc) {
323 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
324 if (!recv->r_ibinc)
325 goto out;
326 }
327
328 WARN_ON(recv->r_frag);
329 recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
330 if (!recv->r_frag)
331 goto out;
332
333 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
334 1, DMA_FROM_DEVICE);
335 WARN_ON(ret != 1);
336
337 sge = &recv->r_sge[0];
338 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
339 sge->length = sizeof(struct rds_header);
340
341 sge = &recv->r_sge[1];
342 sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg);
343 sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg);
344
345 ret = 0;
346out:
347 return ret;
348}
349
350
351
352
353
354
355
356
357void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
358{
359 struct rds_ib_connection *ic = conn->c_transport_data;
360 struct rds_ib_recv_work *recv;
361 struct ib_recv_wr *failed_wr;
362 unsigned int posted = 0;
363 int ret = 0;
364 u32 pos;
365
366 while ((prefill || rds_conn_up(conn)) &&
367 rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
368 if (pos >= ic->i_recv_ring.w_nr) {
369 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
370 pos);
371 break;
372 }
373
374 recv = &ic->i_recvs[pos];
375 ret = rds_ib_recv_refill_one(conn, recv, prefill);
376 if (ret) {
377 break;
378 }
379
380
381 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
382 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
383 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
384 (long) ib_sg_dma_address(
385 ic->i_cm_id->device,
386 &recv->r_frag->f_sg),
387 ret);
388 if (ret) {
389 rds_ib_conn_error(conn, "recv post on "
390 "%pI4 returned %d, disconnecting and "
391 "reconnecting\n", &conn->c_faddr,
392 ret);
393 break;
394 }
395
396 posted++;
397 }
398
399
400 if (ic->i_flowctl && posted)
401 rds_ib_advertise_credits(conn, posted);
402
403 if (ret)
404 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
405}
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420static void rds_ib_recv_cache_put(struct list_head *new_item,
421 struct rds_ib_refill_cache *cache)
422{
423 unsigned long flags;
424 struct list_head *old, *chpfirst;
425
426 local_irq_save(flags);
427
428 chpfirst = __this_cpu_read(cache->percpu->first);
429 if (!chpfirst)
430 INIT_LIST_HEAD(new_item);
431 else
432 list_add_tail(new_item, chpfirst);
433
434 __this_cpu_write(cache->percpu->first, new_item);
435 __this_cpu_inc(cache->percpu->count);
436
437 if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT)
438 goto end;
439
440
441
442
443
444
445
446 do {
447 old = xchg(&cache->xfer, NULL);
448 if (old)
449 list_splice_entire_tail(old, chpfirst);
450 old = cmpxchg(&cache->xfer, NULL, chpfirst);
451 } while (old);
452
453
454 __this_cpu_write(cache->percpu->first, NULL);
455 __this_cpu_write(cache->percpu->count, 0);
456end:
457 local_irq_restore(flags);
458}
459
460static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
461{
462 struct list_head *head = cache->ready;
463
464 if (head) {
465 if (!list_empty(head)) {
466 cache->ready = head->next;
467 list_del_init(head);
468 } else
469 cache->ready = NULL;
470 }
471
472 return head;
473}
474
475int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
476{
477 struct rds_ib_incoming *ibinc;
478 struct rds_page_frag *frag;
479 unsigned long to_copy;
480 unsigned long frag_off = 0;
481 int copied = 0;
482 int ret;
483 u32 len;
484
485 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
486 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
487 len = be32_to_cpu(inc->i_hdr.h_len);
488
489 while (iov_iter_count(to) && copied < len) {
490 if (frag_off == RDS_FRAG_SIZE) {
491 frag = list_entry(frag->f_item.next,
492 struct rds_page_frag, f_item);
493 frag_off = 0;
494 }
495 to_copy = min_t(unsigned long, iov_iter_count(to),
496 RDS_FRAG_SIZE - frag_off);
497 to_copy = min_t(unsigned long, to_copy, len - copied);
498
499
500 rds_stats_add(s_copy_to_user, to_copy);
501 ret = copy_page_to_iter(sg_page(&frag->f_sg),
502 frag->f_sg.offset + frag_off,
503 to_copy,
504 to);
505 if (ret != to_copy)
506 return -EFAULT;
507
508 frag_off += to_copy;
509 copied += to_copy;
510 }
511
512 return copied;
513}
514
515
516void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
517{
518 struct ib_send_wr *wr = &ic->i_ack_wr;
519 struct ib_sge *sge = &ic->i_ack_sge;
520
521 sge->addr = ic->i_ack_dma;
522 sge->length = sizeof(struct rds_header);
523 sge->lkey = ic->i_mr->lkey;
524
525 wr->sg_list = sge;
526 wr->num_sge = 1;
527 wr->opcode = IB_WR_SEND;
528 wr->wr_id = RDS_IB_ACK_WR_ID;
529 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
530}
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554#ifndef KERNEL_HAS_ATOMIC64
555static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
556 int ack_required)
557{
558 unsigned long flags;
559
560 spin_lock_irqsave(&ic->i_ack_lock, flags);
561 ic->i_ack_next = seq;
562 if (ack_required)
563 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
564 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
565}
566
567static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
568{
569 unsigned long flags;
570 u64 seq;
571
572 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
573
574 spin_lock_irqsave(&ic->i_ack_lock, flags);
575 seq = ic->i_ack_next;
576 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
577
578 return seq;
579}
580#else
581static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
582 int ack_required)
583{
584 atomic64_set(&ic->i_ack_next, seq);
585 if (ack_required) {
586 smp_mb__before_atomic();
587 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
588 }
589}
590
591static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
592{
593 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
594 smp_mb__after_atomic();
595
596 return atomic64_read(&ic->i_ack_next);
597}
598#endif
599
600
601static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
602{
603 struct rds_header *hdr = ic->i_ack;
604 struct ib_send_wr *failed_wr;
605 u64 seq;
606 int ret;
607
608 seq = rds_ib_get_ack(ic);
609
610 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
611 rds_message_populate_header(hdr, 0, 0, 0);
612 hdr->h_ack = cpu_to_be64(seq);
613 hdr->h_credit = adv_credits;
614 rds_message_make_checksum(hdr);
615 ic->i_ack_queued = jiffies;
616
617 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
618 if (unlikely(ret)) {
619
620
621
622 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
623 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
624
625 rds_ib_stats_inc(s_ib_ack_send_failure);
626
627 rds_ib_conn_error(ic->conn, "sending ack failed\n");
628 } else
629 rds_ib_stats_inc(s_ib_ack_sent);
630}
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670void rds_ib_attempt_ack(struct rds_ib_connection *ic)
671{
672 unsigned int adv_credits;
673
674 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
675 return;
676
677 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
678 rds_ib_stats_inc(s_ib_ack_send_delayed);
679 return;
680 }
681
682
683 if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) {
684 rds_ib_stats_inc(s_ib_tx_throttle);
685 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
686 return;
687 }
688
689 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
690 rds_ib_send_ack(ic, adv_credits);
691}
692
693
694
695
696
697void rds_ib_ack_send_complete(struct rds_ib_connection *ic)
698{
699 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
700 rds_ib_attempt_ack(ic);
701}
702
703
704
705
706
707u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
708{
709 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
710 rds_ib_stats_inc(s_ib_ack_send_piggybacked);
711 return rds_ib_get_ack(ic);
712}
713
714
715
716
717
718
719
720
721
722static void rds_ib_cong_recv(struct rds_connection *conn,
723 struct rds_ib_incoming *ibinc)
724{
725 struct rds_cong_map *map;
726 unsigned int map_off;
727 unsigned int map_page;
728 struct rds_page_frag *frag;
729 unsigned long frag_off;
730 unsigned long to_copy;
731 unsigned long copied;
732 uint64_t uncongested = 0;
733 void *addr;
734
735
736 if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
737 return;
738
739 map = conn->c_fcong;
740 map_page = 0;
741 map_off = 0;
742
743 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
744 frag_off = 0;
745
746 copied = 0;
747
748 while (copied < RDS_CONG_MAP_BYTES) {
749 uint64_t *src, *dst;
750 unsigned int k;
751
752 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
753 BUG_ON(to_copy & 7);
754
755 addr = kmap_atomic(sg_page(&frag->f_sg));
756
757 src = addr + frag_off;
758 dst = (void *)map->m_page_addrs[map_page] + map_off;
759 for (k = 0; k < to_copy; k += 8) {
760
761
762 uncongested |= ~(*src) & *dst;
763 *dst++ = *src++;
764 }
765 kunmap_atomic(addr);
766
767 copied += to_copy;
768
769 map_off += to_copy;
770 if (map_off == PAGE_SIZE) {
771 map_off = 0;
772 map_page++;
773 }
774
775 frag_off += to_copy;
776 if (frag_off == RDS_FRAG_SIZE) {
777 frag = list_entry(frag->f_item.next,
778 struct rds_page_frag, f_item);
779 frag_off = 0;
780 }
781 }
782
783
784 uncongested = le64_to_cpu(uncongested);
785
786 rds_cong_map_updated(map, uncongested);
787}
788
789
790
791
792
793
794
795struct rds_ib_ack_state {
796 u64 ack_next;
797 u64 ack_recv;
798 unsigned int ack_required:1;
799 unsigned int ack_next_valid:1;
800 unsigned int ack_recv_valid:1;
801};
802
803static void rds_ib_process_recv(struct rds_connection *conn,
804 struct rds_ib_recv_work *recv, u32 data_len,
805 struct rds_ib_ack_state *state)
806{
807 struct rds_ib_connection *ic = conn->c_transport_data;
808 struct rds_ib_incoming *ibinc = ic->i_ibinc;
809 struct rds_header *ihdr, *hdr;
810
811
812
813 rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv,
814 data_len);
815
816 if (data_len < sizeof(struct rds_header)) {
817 rds_ib_conn_error(conn, "incoming message "
818 "from %pI4 didn't include a "
819 "header, disconnecting and "
820 "reconnecting\n",
821 &conn->c_faddr);
822 return;
823 }
824 data_len -= sizeof(struct rds_header);
825
826 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
827
828
829 if (!rds_message_verify_checksum(ihdr)) {
830 rds_ib_conn_error(conn, "incoming message "
831 "from %pI4 has corrupted header - "
832 "forcing a reconnect\n",
833 &conn->c_faddr);
834 rds_stats_inc(s_recv_drop_bad_checksum);
835 return;
836 }
837
838
839 state->ack_recv = be64_to_cpu(ihdr->h_ack);
840 state->ack_recv_valid = 1;
841
842
843 if (ihdr->h_credit)
844 rds_ib_send_add_credits(conn, ihdr->h_credit);
845
846 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && data_len == 0) {
847
848
849
850
851 rds_ib_stats_inc(s_ib_ack_received);
852
853
854
855
856
857
858
859
860
861
862 rds_ib_frag_free(ic, recv->r_frag);
863 recv->r_frag = NULL;
864 return;
865 }
866
867
868
869
870
871
872
873 if (!ibinc) {
874 ibinc = recv->r_ibinc;
875 recv->r_ibinc = NULL;
876 ic->i_ibinc = ibinc;
877
878 hdr = &ibinc->ii_inc.i_hdr;
879 memcpy(hdr, ihdr, sizeof(*hdr));
880 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
881
882 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
883 ic->i_recv_data_rem, hdr->h_flags);
884 } else {
885 hdr = &ibinc->ii_inc.i_hdr;
886
887
888 if (hdr->h_sequence != ihdr->h_sequence ||
889 hdr->h_len != ihdr->h_len ||
890 hdr->h_sport != ihdr->h_sport ||
891 hdr->h_dport != ihdr->h_dport) {
892 rds_ib_conn_error(conn,
893 "fragment header mismatch; forcing reconnect\n");
894 return;
895 }
896 }
897
898 list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags);
899 recv->r_frag = NULL;
900
901 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
902 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
903 else {
904 ic->i_recv_data_rem = 0;
905 ic->i_ibinc = NULL;
906
907 if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
908 rds_ib_cong_recv(conn, ibinc);
909 else {
910 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
911 &ibinc->ii_inc, GFP_ATOMIC);
912 state->ack_next = be64_to_cpu(hdr->h_sequence);
913 state->ack_next_valid = 1;
914 }
915
916
917
918
919 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
920 rds_stats_inc(s_recv_ack_required);
921 state->ack_required = 1;
922 }
923
924 rds_inc_put(&ibinc->ii_inc);
925 }
926}
927
928
929
930
931
932
933
934
935
936
937void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context)
938{
939 struct rds_connection *conn = context;
940 struct rds_ib_connection *ic = conn->c_transport_data;
941
942 rdsdebug("conn %p cq %p\n", conn, cq);
943
944 rds_ib_stats_inc(s_ib_rx_cq_call);
945
946 tasklet_schedule(&ic->i_recv_tasklet);
947}
948
949static inline void rds_poll_cq(struct rds_ib_connection *ic,
950 struct rds_ib_ack_state *state)
951{
952 struct rds_connection *conn = ic->conn;
953 struct ib_wc wc;
954 struct rds_ib_recv_work *recv;
955
956 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
957 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
958 (unsigned long long)wc.wr_id, wc.status,
959 rds_ib_wc_status_str(wc.status), wc.byte_len,
960 be32_to_cpu(wc.ex.imm_data));
961 rds_ib_stats_inc(s_ib_rx_cq_event);
962
963 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
964
965 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
966
967
968
969
970
971
972 if (wc.status == IB_WC_SUCCESS) {
973 rds_ib_process_recv(conn, recv, wc.byte_len, state);
974 } else {
975
976 if (rds_conn_up(conn) || rds_conn_connecting(conn))
977 rds_ib_conn_error(conn, "recv completion on %pI4 had "
978 "status %u (%s), disconnecting and "
979 "reconnecting\n", &conn->c_faddr,
980 wc.status,
981 rds_ib_wc_status_str(wc.status));
982 }
983
984
985
986
987
988
989 rds_ib_ring_free(&ic->i_recv_ring, 1);
990 }
991}
992
993void rds_ib_recv_tasklet_fn(unsigned long data)
994{
995 struct rds_ib_connection *ic = (struct rds_ib_connection *) data;
996 struct rds_connection *conn = ic->conn;
997 struct rds_ib_ack_state state = { 0, };
998
999 rds_poll_cq(ic, &state);
1000 ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
1001 rds_poll_cq(ic, &state);
1002
1003 if (state.ack_next_valid)
1004 rds_ib_set_ack(ic, state.ack_next, state.ack_required);
1005 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
1006 rds_send_drop_acked(conn, state.ack_recv, NULL);
1007 ic->i_ack_recv = state.ack_recv;
1008 }
1009 if (rds_conn_up(conn))
1010 rds_ib_attempt_ack(ic);
1011
1012
1013
1014
1015 if (rds_ib_ring_empty(&ic->i_recv_ring))
1016 rds_ib_stats_inc(s_ib_rx_ring_empty);
1017
1018 if (rds_ib_ring_low(&ic->i_recv_ring))
1019 rds_ib_recv_refill(conn, 0);
1020}
1021
1022int rds_ib_recv(struct rds_connection *conn)
1023{
1024 struct rds_ib_connection *ic = conn->c_transport_data;
1025 int ret = 0;
1026
1027 rdsdebug("conn %p\n", conn);
1028 if (rds_conn_up(conn))
1029 rds_ib_attempt_ack(ic);
1030
1031 return ret;
1032}
1033
1034int rds_ib_recv_init(void)
1035{
1036 struct sysinfo si;
1037 int ret = -ENOMEM;
1038
1039
1040 si_meminfo(&si);
1041 rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
1042
1043 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
1044 sizeof(struct rds_ib_incoming),
1045 0, SLAB_HWCACHE_ALIGN, NULL);
1046 if (!rds_ib_incoming_slab)
1047 goto out;
1048
1049 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
1050 sizeof(struct rds_page_frag),
1051 0, SLAB_HWCACHE_ALIGN, NULL);
1052 if (!rds_ib_frag_slab)
1053 kmem_cache_destroy(rds_ib_incoming_slab);
1054 else
1055 ret = 0;
1056out:
1057 return ret;
1058}
1059
1060void rds_ib_recv_exit(void)
1061{
1062 kmem_cache_destroy(rds_ib_incoming_slab);
1063 kmem_cache_destroy(rds_ib_frag_slab);
1064}
1065