1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/kernel.h>
34#include <linux/slab.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <rdma/rdma_cm.h>
38
39#include "rds_single_path.h"
40#include "rds.h"
41#include "ib.h"
42
43static struct kmem_cache *rds_ib_incoming_slab;
44static struct kmem_cache *rds_ib_frag_slab;
45static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
46
47void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
48{
49 struct rds_ib_recv_work *recv;
50 u32 i;
51
52 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
53 struct ib_sge *sge;
54
55 recv->r_ibinc = NULL;
56 recv->r_frag = NULL;
57
58 recv->r_wr.next = NULL;
59 recv->r_wr.wr_id = i;
60 recv->r_wr.sg_list = recv->r_sge;
61 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
62
63 sge = &recv->r_sge[0];
64 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
65 sge->length = sizeof(struct rds_header);
66 sge->lkey = ic->i_pd->local_dma_lkey;
67
68 sge = &recv->r_sge[1];
69 sge->addr = 0;
70 sge->length = RDS_FRAG_SIZE;
71 sge->lkey = ic->i_pd->local_dma_lkey;
72 }
73}
74
75
76
77
78
79static void list_splice_entire_tail(struct list_head *from,
80 struct list_head *to)
81{
82 struct list_head *from_last = from->prev;
83
84 list_splice_tail(from_last, to);
85 list_add_tail(from_last, to);
86}
87
88static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
89{
90 struct list_head *tmp;
91
92 tmp = xchg(&cache->xfer, NULL);
93 if (tmp) {
94 if (cache->ready)
95 list_splice_entire_tail(tmp, cache->ready);
96 else
97 cache->ready = tmp;
98 }
99}
100
101static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
102{
103 struct rds_ib_cache_head *head;
104 int cpu;
105
106 cache->percpu = alloc_percpu(struct rds_ib_cache_head);
107 if (!cache->percpu)
108 return -ENOMEM;
109
110 for_each_possible_cpu(cpu) {
111 head = per_cpu_ptr(cache->percpu, cpu);
112 head->first = NULL;
113 head->count = 0;
114 }
115 cache->xfer = NULL;
116 cache->ready = NULL;
117
118 return 0;
119}
120
121int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
122{
123 int ret;
124
125 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
126 if (!ret) {
127 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
128 if (ret)
129 free_percpu(ic->i_cache_incs.percpu);
130 }
131
132 return ret;
133}
134
135static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
136 struct list_head *caller_list)
137{
138 struct rds_ib_cache_head *head;
139 int cpu;
140
141 for_each_possible_cpu(cpu) {
142 head = per_cpu_ptr(cache->percpu, cpu);
143 if (head->first) {
144 list_splice_entire_tail(head->first, caller_list);
145 head->first = NULL;
146 }
147 }
148
149 if (cache->ready) {
150 list_splice_entire_tail(cache->ready, caller_list);
151 cache->ready = NULL;
152 }
153}
154
155void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
156{
157 struct rds_ib_incoming *inc;
158 struct rds_ib_incoming *inc_tmp;
159 struct rds_page_frag *frag;
160 struct rds_page_frag *frag_tmp;
161 LIST_HEAD(list);
162
163 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
164 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
165 free_percpu(ic->i_cache_incs.percpu);
166
167 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
168 list_del(&inc->ii_cache_entry);
169 WARN_ON(!list_empty(&inc->ii_frags));
170 kmem_cache_free(rds_ib_incoming_slab, inc);
171 }
172
173 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
174 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
175 free_percpu(ic->i_cache_frags.percpu);
176
177 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
178 list_del(&frag->f_cache_entry);
179 WARN_ON(!list_empty(&frag->f_item));
180 kmem_cache_free(rds_ib_frag_slab, frag);
181 }
182}
183
184
185static void rds_ib_recv_cache_put(struct list_head *new_item,
186 struct rds_ib_refill_cache *cache);
187static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
188
189
190
191static void rds_ib_frag_free(struct rds_ib_connection *ic,
192 struct rds_page_frag *frag)
193{
194 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
195
196 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
197 atomic_add(RDS_FRAG_SIZE / SZ_1K, &ic->i_cache_allocs);
198 rds_ib_stats_add(s_ib_recv_added_to_cache, RDS_FRAG_SIZE);
199}
200
201
202void rds_ib_inc_free(struct rds_incoming *inc)
203{
204 struct rds_ib_incoming *ibinc;
205 struct rds_page_frag *frag;
206 struct rds_page_frag *pos;
207 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
208
209 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
210
211
212 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
213 list_del_init(&frag->f_item);
214 rds_ib_frag_free(ic, frag);
215 }
216 BUG_ON(!list_empty(&ibinc->ii_frags));
217
218 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
219 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
220}
221
222static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
223 struct rds_ib_recv_work *recv)
224{
225 if (recv->r_ibinc) {
226 rds_inc_put(&recv->r_ibinc->ii_inc);
227 recv->r_ibinc = NULL;
228 }
229 if (recv->r_frag) {
230 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
231 rds_ib_frag_free(ic, recv->r_frag);
232 recv->r_frag = NULL;
233 }
234}
235
236void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
237{
238 u32 i;
239
240 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
241 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
242}
243
244static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
245 gfp_t slab_mask)
246{
247 struct rds_ib_incoming *ibinc;
248 struct list_head *cache_item;
249 int avail_allocs;
250
251 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
252 if (cache_item) {
253 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
254 } else {
255 avail_allocs = atomic_add_unless(&rds_ib_allocation,
256 1, rds_ib_sysctl_max_recv_allocation);
257 if (!avail_allocs) {
258 rds_ib_stats_inc(s_ib_rx_alloc_limit);
259 return NULL;
260 }
261 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
262 if (!ibinc) {
263 atomic_dec(&rds_ib_allocation);
264 return NULL;
265 }
266 rds_ib_stats_inc(s_ib_rx_total_incs);
267 }
268 INIT_LIST_HEAD(&ibinc->ii_frags);
269 rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
270
271 return ibinc;
272}
273
274static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
275 gfp_t slab_mask, gfp_t page_mask)
276{
277 struct rds_page_frag *frag;
278 struct list_head *cache_item;
279 int ret;
280
281 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
282 if (cache_item) {
283 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
284 atomic_sub(RDS_FRAG_SIZE / SZ_1K, &ic->i_cache_allocs);
285 rds_ib_stats_add(s_ib_recv_added_to_cache, RDS_FRAG_SIZE);
286 } else {
287 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
288 if (!frag)
289 return NULL;
290
291 sg_init_table(&frag->f_sg, 1);
292 ret = rds_page_remainder_alloc(&frag->f_sg,
293 RDS_FRAG_SIZE, page_mask);
294 if (ret) {
295 kmem_cache_free(rds_ib_frag_slab, frag);
296 return NULL;
297 }
298 rds_ib_stats_inc(s_ib_rx_total_frags);
299 }
300
301 INIT_LIST_HEAD(&frag->f_item);
302
303 return frag;
304}
305
306static int rds_ib_recv_refill_one(struct rds_connection *conn,
307 struct rds_ib_recv_work *recv, gfp_t gfp)
308{
309 struct rds_ib_connection *ic = conn->c_transport_data;
310 struct ib_sge *sge;
311 int ret = -ENOMEM;
312 gfp_t slab_mask = GFP_NOWAIT;
313 gfp_t page_mask = GFP_NOWAIT;
314
315 if (gfp & __GFP_DIRECT_RECLAIM) {
316 slab_mask = GFP_KERNEL;
317 page_mask = GFP_HIGHUSER;
318 }
319
320 if (!ic->i_cache_incs.ready)
321 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
322 if (!ic->i_cache_frags.ready)
323 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
324
325
326
327
328
329 if (!recv->r_ibinc) {
330 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
331 if (!recv->r_ibinc)
332 goto out;
333 }
334
335 WARN_ON(recv->r_frag);
336 recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
337 if (!recv->r_frag)
338 goto out;
339
340 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
341 1, DMA_FROM_DEVICE);
342 WARN_ON(ret != 1);
343
344 sge = &recv->r_sge[0];
345 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
346 sge->length = sizeof(struct rds_header);
347
348 sge = &recv->r_sge[1];
349 sge->addr = sg_dma_address(&recv->r_frag->f_sg);
350 sge->length = sg_dma_len(&recv->r_frag->f_sg);
351
352 ret = 0;
353out:
354 return ret;
355}
356
357static int acquire_refill(struct rds_connection *conn)
358{
359 return test_and_set_bit(RDS_RECV_REFILL, &conn->c_flags) == 0;
360}
361
362static void release_refill(struct rds_connection *conn)
363{
364 clear_bit(RDS_RECV_REFILL, &conn->c_flags);
365
366
367
368
369
370
371 if (waitqueue_active(&conn->c_waitq))
372 wake_up_all(&conn->c_waitq);
373}
374
375
376
377
378
379
380
381
382void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
383{
384 struct rds_ib_connection *ic = conn->c_transport_data;
385 struct rds_ib_recv_work *recv;
386 struct ib_recv_wr *failed_wr;
387 unsigned int posted = 0;
388 int ret = 0;
389 bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
390 u32 pos;
391
392
393
394
395
396 if (!acquire_refill(conn))
397 return;
398
399 while ((prefill || rds_conn_up(conn)) &&
400 rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
401 if (pos >= ic->i_recv_ring.w_nr) {
402 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
403 pos);
404 break;
405 }
406
407 recv = &ic->i_recvs[pos];
408 ret = rds_ib_recv_refill_one(conn, recv, gfp);
409 if (ret) {
410 break;
411 }
412
413 rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
414 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
415 (long)sg_dma_address(&recv->r_frag->f_sg));
416
417
418 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL);
419 if (ret) {
420 rds_ib_conn_error(conn, "recv post on "
421 "%pI6c returned %d, disconnecting and "
422 "reconnecting\n", &conn->c_faddr,
423 ret);
424 break;
425 }
426
427 posted++;
428 }
429
430
431 if (ic->i_flowctl && posted)
432 rds_ib_advertise_credits(conn, posted);
433
434 if (ret)
435 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
436
437 release_refill(conn);
438
439
440
441
442
443
444
445
446
447
448
449 if (rds_conn_up(conn) &&
450 ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
451 rds_ib_ring_empty(&ic->i_recv_ring))) {
452 queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
453 }
454}
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469static void rds_ib_recv_cache_put(struct list_head *new_item,
470 struct rds_ib_refill_cache *cache)
471{
472 unsigned long flags;
473 struct list_head *old, *chpfirst;
474
475 local_irq_save(flags);
476
477 chpfirst = __this_cpu_read(cache->percpu->first);
478 if (!chpfirst)
479 INIT_LIST_HEAD(new_item);
480 else
481 list_add_tail(new_item, chpfirst);
482
483 __this_cpu_write(cache->percpu->first, new_item);
484 __this_cpu_inc(cache->percpu->count);
485
486 if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT)
487 goto end;
488
489
490
491
492
493
494
495 do {
496 old = xchg(&cache->xfer, NULL);
497 if (old)
498 list_splice_entire_tail(old, chpfirst);
499 old = cmpxchg(&cache->xfer, NULL, chpfirst);
500 } while (old);
501
502
503 __this_cpu_write(cache->percpu->first, NULL);
504 __this_cpu_write(cache->percpu->count, 0);
505end:
506 local_irq_restore(flags);
507}
508
509static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
510{
511 struct list_head *head = cache->ready;
512
513 if (head) {
514 if (!list_empty(head)) {
515 cache->ready = head->next;
516 list_del_init(head);
517 } else
518 cache->ready = NULL;
519 }
520
521 return head;
522}
523
524int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
525{
526 struct rds_ib_incoming *ibinc;
527 struct rds_page_frag *frag;
528 unsigned long to_copy;
529 unsigned long frag_off = 0;
530 int copied = 0;
531 int ret;
532 u32 len;
533
534 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
535 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
536 len = be32_to_cpu(inc->i_hdr.h_len);
537
538 while (iov_iter_count(to) && copied < len) {
539 if (frag_off == RDS_FRAG_SIZE) {
540 frag = list_entry(frag->f_item.next,
541 struct rds_page_frag, f_item);
542 frag_off = 0;
543 }
544 to_copy = min_t(unsigned long, iov_iter_count(to),
545 RDS_FRAG_SIZE - frag_off);
546 to_copy = min_t(unsigned long, to_copy, len - copied);
547
548
549 rds_stats_add(s_copy_to_user, to_copy);
550 ret = copy_page_to_iter(sg_page(&frag->f_sg),
551 frag->f_sg.offset + frag_off,
552 to_copy,
553 to);
554 if (ret != to_copy)
555 return -EFAULT;
556
557 frag_off += to_copy;
558 copied += to_copy;
559 }
560
561 return copied;
562}
563
564
565void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
566{
567 struct ib_send_wr *wr = &ic->i_ack_wr;
568 struct ib_sge *sge = &ic->i_ack_sge;
569
570 sge->addr = ic->i_ack_dma;
571 sge->length = sizeof(struct rds_header);
572 sge->lkey = ic->i_pd->local_dma_lkey;
573
574 wr->sg_list = sge;
575 wr->num_sge = 1;
576 wr->opcode = IB_WR_SEND;
577 wr->wr_id = RDS_IB_ACK_WR_ID;
578 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
579}
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603#ifndef KERNEL_HAS_ATOMIC64
604void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
605{
606 unsigned long flags;
607
608 spin_lock_irqsave(&ic->i_ack_lock, flags);
609 ic->i_ack_next = seq;
610 if (ack_required)
611 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
612 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
613}
614
615static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
616{
617 unsigned long flags;
618 u64 seq;
619
620 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
621
622 spin_lock_irqsave(&ic->i_ack_lock, flags);
623 seq = ic->i_ack_next;
624 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
625
626 return seq;
627}
628#else
629void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
630{
631 atomic64_set(&ic->i_ack_next, seq);
632 if (ack_required) {
633 smp_mb__before_atomic();
634 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
635 }
636}
637
638static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
639{
640 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
641 smp_mb__after_atomic();
642
643 return atomic64_read(&ic->i_ack_next);
644}
645#endif
646
647
648static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
649{
650 struct rds_header *hdr = ic->i_ack;
651 struct ib_send_wr *failed_wr;
652 u64 seq;
653 int ret;
654
655 seq = rds_ib_get_ack(ic);
656
657 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
658 rds_message_populate_header(hdr, 0, 0, 0);
659 hdr->h_ack = cpu_to_be64(seq);
660 hdr->h_credit = adv_credits;
661 rds_message_make_checksum(hdr);
662 ic->i_ack_queued = jiffies;
663
664 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
665 if (unlikely(ret)) {
666
667
668
669 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
670 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
671
672 rds_ib_stats_inc(s_ib_ack_send_failure);
673
674 rds_ib_conn_error(ic->conn, "sending ack failed\n");
675 } else
676 rds_ib_stats_inc(s_ib_ack_sent);
677}
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717void rds_ib_attempt_ack(struct rds_ib_connection *ic)
718{
719 unsigned int adv_credits;
720
721 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
722 return;
723
724 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
725 rds_ib_stats_inc(s_ib_ack_send_delayed);
726 return;
727 }
728
729
730 if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) {
731 rds_ib_stats_inc(s_ib_tx_throttle);
732 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
733 return;
734 }
735
736 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
737 rds_ib_send_ack(ic, adv_credits);
738}
739
740
741
742
743
744void rds_ib_ack_send_complete(struct rds_ib_connection *ic)
745{
746 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
747 rds_ib_attempt_ack(ic);
748}
749
750
751
752
753
754u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
755{
756 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
757 rds_ib_stats_inc(s_ib_ack_send_piggybacked);
758 return rds_ib_get_ack(ic);
759}
760
761
762
763
764
765
766
767
768
769static void rds_ib_cong_recv(struct rds_connection *conn,
770 struct rds_ib_incoming *ibinc)
771{
772 struct rds_cong_map *map;
773 unsigned int map_off;
774 unsigned int map_page;
775 struct rds_page_frag *frag;
776 unsigned long frag_off;
777 unsigned long to_copy;
778 unsigned long copied;
779 uint64_t uncongested = 0;
780 void *addr;
781
782
783 if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
784 return;
785
786 map = conn->c_fcong;
787 map_page = 0;
788 map_off = 0;
789
790 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
791 frag_off = 0;
792
793 copied = 0;
794
795 while (copied < RDS_CONG_MAP_BYTES) {
796 uint64_t *src, *dst;
797 unsigned int k;
798
799 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
800 BUG_ON(to_copy & 7);
801
802 addr = kmap_atomic(sg_page(&frag->f_sg));
803
804 src = addr + frag->f_sg.offset + frag_off;
805 dst = (void *)map->m_page_addrs[map_page] + map_off;
806 for (k = 0; k < to_copy; k += 8) {
807
808
809 uncongested |= ~(*src) & *dst;
810 *dst++ = *src++;
811 }
812 kunmap_atomic(addr);
813
814 copied += to_copy;
815
816 map_off += to_copy;
817 if (map_off == PAGE_SIZE) {
818 map_off = 0;
819 map_page++;
820 }
821
822 frag_off += to_copy;
823 if (frag_off == RDS_FRAG_SIZE) {
824 frag = list_entry(frag->f_item.next,
825 struct rds_page_frag, f_item);
826 frag_off = 0;
827 }
828 }
829
830
831 uncongested = le64_to_cpu(uncongested);
832
833 rds_cong_map_updated(map, uncongested);
834}
835
836static void rds_ib_process_recv(struct rds_connection *conn,
837 struct rds_ib_recv_work *recv, u32 data_len,
838 struct rds_ib_ack_state *state)
839{
840 struct rds_ib_connection *ic = conn->c_transport_data;
841 struct rds_ib_incoming *ibinc = ic->i_ibinc;
842 struct rds_header *ihdr, *hdr;
843
844
845
846 rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv,
847 data_len);
848
849 if (data_len < sizeof(struct rds_header)) {
850 rds_ib_conn_error(conn, "incoming message "
851 "from %pI4 didn't include a "
852 "header, disconnecting and "
853 "reconnecting\n",
854 &conn->c_faddr);
855 return;
856 }
857 data_len -= sizeof(struct rds_header);
858
859 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
860
861
862 if (!rds_message_verify_checksum(ihdr)) {
863 rds_ib_conn_error(conn, "incoming message "
864 "from %pI4 has corrupted header - "
865 "forcing a reconnect\n",
866 &conn->c_faddr);
867 rds_stats_inc(s_recv_drop_bad_checksum);
868 return;
869 }
870
871
872 state->ack_recv = be64_to_cpu(ihdr->h_ack);
873 state->ack_recv_valid = 1;
874
875
876 if (ihdr->h_credit)
877 rds_ib_send_add_credits(conn, ihdr->h_credit);
878
879 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && data_len == 0) {
880
881
882
883
884 rds_ib_stats_inc(s_ib_ack_received);
885
886
887
888
889
890
891
892
893
894
895 rds_ib_frag_free(ic, recv->r_frag);
896 recv->r_frag = NULL;
897 return;
898 }
899
900
901
902
903
904
905
906 if (!ibinc) {
907 ibinc = recv->r_ibinc;
908 recv->r_ibinc = NULL;
909 ic->i_ibinc = ibinc;
910
911 hdr = &ibinc->ii_inc.i_hdr;
912 ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
913 local_clock();
914 memcpy(hdr, ihdr, sizeof(*hdr));
915 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
916 ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
917 local_clock();
918
919 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
920 ic->i_recv_data_rem, hdr->h_flags);
921 } else {
922 hdr = &ibinc->ii_inc.i_hdr;
923
924
925 if (hdr->h_sequence != ihdr->h_sequence ||
926 hdr->h_len != ihdr->h_len ||
927 hdr->h_sport != ihdr->h_sport ||
928 hdr->h_dport != ihdr->h_dport) {
929 rds_ib_conn_error(conn,
930 "fragment header mismatch; forcing reconnect\n");
931 return;
932 }
933 }
934
935 list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags);
936 recv->r_frag = NULL;
937
938 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
939 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
940 else {
941 ic->i_recv_data_rem = 0;
942 ic->i_ibinc = NULL;
943
944 if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
945 rds_ib_cong_recv(conn, ibinc);
946 else {
947 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
948 &ibinc->ii_inc, GFP_ATOMIC);
949 state->ack_next = be64_to_cpu(hdr->h_sequence);
950 state->ack_next_valid = 1;
951 }
952
953
954
955
956 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
957 rds_stats_inc(s_recv_ack_required);
958 state->ack_required = 1;
959 }
960
961 rds_inc_put(&ibinc->ii_inc);
962 }
963}
964
965void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
966 struct ib_wc *wc,
967 struct rds_ib_ack_state *state)
968{
969 struct rds_connection *conn = ic->conn;
970 struct rds_ib_recv_work *recv;
971
972 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
973 (unsigned long long)wc->wr_id, wc->status,
974 ib_wc_status_msg(wc->status), wc->byte_len,
975 be32_to_cpu(wc->ex.imm_data));
976
977 rds_ib_stats_inc(s_ib_rx_cq_event);
978 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
979 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1,
980 DMA_FROM_DEVICE);
981
982
983
984
985
986 if (wc->status == IB_WC_SUCCESS) {
987 rds_ib_process_recv(conn, recv, wc->byte_len, state);
988 } else {
989
990 if (rds_conn_up(conn) || rds_conn_connecting(conn))
991 rds_ib_conn_error(conn, "recv completion on <%pI4,%pI4> had status %u (%s), disconnecting and reconnecting\n",
992 &conn->c_laddr, &conn->c_faddr,
993 wc->status,
994 ib_wc_status_msg(wc->status));
995 }
996
997
998
999
1000
1001
1002
1003
1004 if (recv->r_frag) {
1005 rds_ib_frag_free(ic, recv->r_frag);
1006 recv->r_frag = NULL;
1007 }
1008 rds_ib_ring_free(&ic->i_recv_ring, 1);
1009
1010
1011
1012
1013 if (rds_ib_ring_empty(&ic->i_recv_ring))
1014 rds_ib_stats_inc(s_ib_rx_ring_empty);
1015
1016 if (rds_ib_ring_low(&ic->i_recv_ring)) {
1017 rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
1018 rds_ib_stats_inc(s_ib_rx_refill_from_cq);
1019 }
1020}
1021
1022int rds_ib_recv_path(struct rds_conn_path *cp)
1023{
1024 struct rds_connection *conn = cp->cp_conn;
1025 struct rds_ib_connection *ic = conn->c_transport_data;
1026 int ret = 0;
1027
1028 rdsdebug("conn %p\n", conn);
1029 if (rds_conn_up(conn)) {
1030 rds_ib_attempt_ack(ic);
1031 rds_ib_recv_refill(conn, 0, GFP_KERNEL);
1032 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
1033 }
1034
1035 return ret;
1036}
1037
1038int rds_ib_recv_init(void)
1039{
1040 struct sysinfo si;
1041 int ret = -ENOMEM;
1042
1043
1044 si_meminfo(&si);
1045 rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
1046
1047 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
1048 sizeof(struct rds_ib_incoming),
1049 0, SLAB_HWCACHE_ALIGN, NULL);
1050 if (!rds_ib_incoming_slab)
1051 goto out;
1052
1053 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
1054 sizeof(struct rds_page_frag),
1055 0, SLAB_HWCACHE_ALIGN, NULL);
1056 if (!rds_ib_frag_slab) {
1057 kmem_cache_destroy(rds_ib_incoming_slab);
1058 rds_ib_incoming_slab = NULL;
1059 } else
1060 ret = 0;
1061out:
1062 return ret;
1063}
1064
1065void rds_ib_recv_exit(void)
1066{
1067 kmem_cache_destroy(rds_ib_incoming_slab);
1068 kmem_cache_destroy(rds_ib_frag_slab);
1069}
1070