1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/kernel.h>
34#include <linux/slab.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <rdma/rdma_cm.h>
38
39#include "rds_single_path.h"
40#include "rds.h"
41#include "ib.h"
42
43static struct kmem_cache *rds_ib_incoming_slab;
44static struct kmem_cache *rds_ib_frag_slab;
45static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
46
47void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
48{
49 struct rds_ib_recv_work *recv;
50 u32 i;
51
52 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
53 struct ib_sge *sge;
54
55 recv->r_ibinc = NULL;
56 recv->r_frag = NULL;
57
58 recv->r_wr.next = NULL;
59 recv->r_wr.wr_id = i;
60 recv->r_wr.sg_list = recv->r_sge;
61 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
62
63 sge = &recv->r_sge[0];
64 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
65 sge->length = sizeof(struct rds_header);
66 sge->lkey = ic->i_pd->local_dma_lkey;
67
68 sge = &recv->r_sge[1];
69 sge->addr = 0;
70 sge->length = RDS_FRAG_SIZE;
71 sge->lkey = ic->i_pd->local_dma_lkey;
72 }
73}
74
75
76
77
78
79static void list_splice_entire_tail(struct list_head *from,
80 struct list_head *to)
81{
82 struct list_head *from_last = from->prev;
83
84 list_splice_tail(from_last, to);
85 list_add_tail(from_last, to);
86}
87
88static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
89{
90 struct list_head *tmp;
91
92 tmp = xchg(&cache->xfer, NULL);
93 if (tmp) {
94 if (cache->ready)
95 list_splice_entire_tail(tmp, cache->ready);
96 else
97 cache->ready = tmp;
98 }
99}
100
101static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
102{
103 struct rds_ib_cache_head *head;
104 int cpu;
105
106 cache->percpu = alloc_percpu(struct rds_ib_cache_head);
107 if (!cache->percpu)
108 return -ENOMEM;
109
110 for_each_possible_cpu(cpu) {
111 head = per_cpu_ptr(cache->percpu, cpu);
112 head->first = NULL;
113 head->count = 0;
114 }
115 cache->xfer = NULL;
116 cache->ready = NULL;
117
118 return 0;
119}
120
121int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
122{
123 int ret;
124
125 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
126 if (!ret) {
127 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
128 if (ret)
129 free_percpu(ic->i_cache_incs.percpu);
130 }
131
132 return ret;
133}
134
135static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
136 struct list_head *caller_list)
137{
138 struct rds_ib_cache_head *head;
139 int cpu;
140
141 for_each_possible_cpu(cpu) {
142 head = per_cpu_ptr(cache->percpu, cpu);
143 if (head->first) {
144 list_splice_entire_tail(head->first, caller_list);
145 head->first = NULL;
146 }
147 }
148
149 if (cache->ready) {
150 list_splice_entire_tail(cache->ready, caller_list);
151 cache->ready = NULL;
152 }
153}
154
155void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
156{
157 struct rds_ib_incoming *inc;
158 struct rds_ib_incoming *inc_tmp;
159 struct rds_page_frag *frag;
160 struct rds_page_frag *frag_tmp;
161 LIST_HEAD(list);
162
163 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
164 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
165 free_percpu(ic->i_cache_incs.percpu);
166
167 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
168 list_del(&inc->ii_cache_entry);
169 WARN_ON(!list_empty(&inc->ii_frags));
170 kmem_cache_free(rds_ib_incoming_slab, inc);
171 }
172
173 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
174 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
175 free_percpu(ic->i_cache_frags.percpu);
176
177 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
178 list_del(&frag->f_cache_entry);
179 WARN_ON(!list_empty(&frag->f_item));
180 kmem_cache_free(rds_ib_frag_slab, frag);
181 }
182}
183
184
185static void rds_ib_recv_cache_put(struct list_head *new_item,
186 struct rds_ib_refill_cache *cache);
187static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
188
189
190
191static void rds_ib_frag_free(struct rds_ib_connection *ic,
192 struct rds_page_frag *frag)
193{
194 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
195
196 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
197 atomic_add(RDS_FRAG_SIZE / SZ_1K, &ic->i_cache_allocs);
198 rds_ib_stats_add(s_ib_recv_added_to_cache, RDS_FRAG_SIZE);
199}
200
201
202void rds_ib_inc_free(struct rds_incoming *inc)
203{
204 struct rds_ib_incoming *ibinc;
205 struct rds_page_frag *frag;
206 struct rds_page_frag *pos;
207 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
208
209 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
210
211
212 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
213 list_del_init(&frag->f_item);
214 rds_ib_frag_free(ic, frag);
215 }
216 BUG_ON(!list_empty(&ibinc->ii_frags));
217
218 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
219 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
220}
221
222static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
223 struct rds_ib_recv_work *recv)
224{
225 if (recv->r_ibinc) {
226 rds_inc_put(&recv->r_ibinc->ii_inc);
227 recv->r_ibinc = NULL;
228 }
229 if (recv->r_frag) {
230 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
231 rds_ib_frag_free(ic, recv->r_frag);
232 recv->r_frag = NULL;
233 }
234}
235
236void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
237{
238 u32 i;
239
240 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
241 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
242}
243
244static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
245 gfp_t slab_mask)
246{
247 struct rds_ib_incoming *ibinc;
248 struct list_head *cache_item;
249 int avail_allocs;
250
251 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
252 if (cache_item) {
253 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
254 } else {
255 avail_allocs = atomic_add_unless(&rds_ib_allocation,
256 1, rds_ib_sysctl_max_recv_allocation);
257 if (!avail_allocs) {
258 rds_ib_stats_inc(s_ib_rx_alloc_limit);
259 return NULL;
260 }
261 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
262 if (!ibinc) {
263 atomic_dec(&rds_ib_allocation);
264 return NULL;
265 }
266 rds_ib_stats_inc(s_ib_rx_total_incs);
267 }
268 INIT_LIST_HEAD(&ibinc->ii_frags);
269 rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
270
271 return ibinc;
272}
273
274static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
275 gfp_t slab_mask, gfp_t page_mask)
276{
277 struct rds_page_frag *frag;
278 struct list_head *cache_item;
279 int ret;
280
281 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
282 if (cache_item) {
283 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
284 atomic_sub(RDS_FRAG_SIZE / SZ_1K, &ic->i_cache_allocs);
285 rds_ib_stats_add(s_ib_recv_added_to_cache, RDS_FRAG_SIZE);
286 } else {
287 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
288 if (!frag)
289 return NULL;
290
291 sg_init_table(&frag->f_sg, 1);
292 ret = rds_page_remainder_alloc(&frag->f_sg,
293 RDS_FRAG_SIZE, page_mask);
294 if (ret) {
295 kmem_cache_free(rds_ib_frag_slab, frag);
296 return NULL;
297 }
298 rds_ib_stats_inc(s_ib_rx_total_frags);
299 }
300
301 INIT_LIST_HEAD(&frag->f_item);
302
303 return frag;
304}
305
306static int rds_ib_recv_refill_one(struct rds_connection *conn,
307 struct rds_ib_recv_work *recv, gfp_t gfp)
308{
309 struct rds_ib_connection *ic = conn->c_transport_data;
310 struct ib_sge *sge;
311 int ret = -ENOMEM;
312 gfp_t slab_mask = GFP_NOWAIT;
313 gfp_t page_mask = GFP_NOWAIT;
314
315 if (gfp & __GFP_DIRECT_RECLAIM) {
316 slab_mask = GFP_KERNEL;
317 page_mask = GFP_HIGHUSER;
318 }
319
320 if (!ic->i_cache_incs.ready)
321 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
322 if (!ic->i_cache_frags.ready)
323 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
324
325
326
327
328
329 if (!recv->r_ibinc) {
330 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
331 if (!recv->r_ibinc)
332 goto out;
333 }
334
335 WARN_ON(recv->r_frag);
336 recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
337 if (!recv->r_frag)
338 goto out;
339
340 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
341 1, DMA_FROM_DEVICE);
342 WARN_ON(ret != 1);
343
344 sge = &recv->r_sge[0];
345 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
346 sge->length = sizeof(struct rds_header);
347
348 sge = &recv->r_sge[1];
349 sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg);
350 sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg);
351
352 ret = 0;
353out:
354 return ret;
355}
356
357static int acquire_refill(struct rds_connection *conn)
358{
359 return test_and_set_bit(RDS_RECV_REFILL, &conn->c_flags) == 0;
360}
361
362static void release_refill(struct rds_connection *conn)
363{
364 clear_bit(RDS_RECV_REFILL, &conn->c_flags);
365
366
367
368
369
370
371 if (waitqueue_active(&conn->c_waitq))
372 wake_up_all(&conn->c_waitq);
373}
374
375
376
377
378
379
380
381
382void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
383{
384 struct rds_ib_connection *ic = conn->c_transport_data;
385 struct rds_ib_recv_work *recv;
386 struct ib_recv_wr *failed_wr;
387 unsigned int posted = 0;
388 int ret = 0;
389 bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
390 u32 pos;
391
392
393
394
395
396 if (!acquire_refill(conn))
397 return;
398
399 while ((prefill || rds_conn_up(conn)) &&
400 rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
401 if (pos >= ic->i_recv_ring.w_nr) {
402 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
403 pos);
404 break;
405 }
406
407 recv = &ic->i_recvs[pos];
408 ret = rds_ib_recv_refill_one(conn, recv, gfp);
409 if (ret) {
410 break;
411 }
412
413 rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
414 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
415 (long) ib_sg_dma_address(
416 ic->i_cm_id->device,
417 &recv->r_frag->f_sg));
418
419
420 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
421 if (ret) {
422 rds_ib_conn_error(conn, "recv post on "
423 "%pI4 returned %d, disconnecting and "
424 "reconnecting\n", &conn->c_faddr,
425 ret);
426 break;
427 }
428
429 posted++;
430 }
431
432
433 if (ic->i_flowctl && posted)
434 rds_ib_advertise_credits(conn, posted);
435
436 if (ret)
437 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
438
439 release_refill(conn);
440
441
442
443
444
445
446
447
448
449
450
451 if (rds_conn_up(conn) &&
452 ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
453 rds_ib_ring_empty(&ic->i_recv_ring))) {
454 queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
455 }
456}
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471static void rds_ib_recv_cache_put(struct list_head *new_item,
472 struct rds_ib_refill_cache *cache)
473{
474 unsigned long flags;
475 struct list_head *old, *chpfirst;
476
477 local_irq_save(flags);
478
479 chpfirst = __this_cpu_read(cache->percpu->first);
480 if (!chpfirst)
481 INIT_LIST_HEAD(new_item);
482 else
483 list_add_tail(new_item, chpfirst);
484
485 __this_cpu_write(cache->percpu->first, new_item);
486 __this_cpu_inc(cache->percpu->count);
487
488 if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT)
489 goto end;
490
491
492
493
494
495
496
497 do {
498 old = xchg(&cache->xfer, NULL);
499 if (old)
500 list_splice_entire_tail(old, chpfirst);
501 old = cmpxchg(&cache->xfer, NULL, chpfirst);
502 } while (old);
503
504
505 __this_cpu_write(cache->percpu->first, NULL);
506 __this_cpu_write(cache->percpu->count, 0);
507end:
508 local_irq_restore(flags);
509}
510
511static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
512{
513 struct list_head *head = cache->ready;
514
515 if (head) {
516 if (!list_empty(head)) {
517 cache->ready = head->next;
518 list_del_init(head);
519 } else
520 cache->ready = NULL;
521 }
522
523 return head;
524}
525
526int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
527{
528 struct rds_ib_incoming *ibinc;
529 struct rds_page_frag *frag;
530 unsigned long to_copy;
531 unsigned long frag_off = 0;
532 int copied = 0;
533 int ret;
534 u32 len;
535
536 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
537 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
538 len = be32_to_cpu(inc->i_hdr.h_len);
539
540 while (iov_iter_count(to) && copied < len) {
541 if (frag_off == RDS_FRAG_SIZE) {
542 frag = list_entry(frag->f_item.next,
543 struct rds_page_frag, f_item);
544 frag_off = 0;
545 }
546 to_copy = min_t(unsigned long, iov_iter_count(to),
547 RDS_FRAG_SIZE - frag_off);
548 to_copy = min_t(unsigned long, to_copy, len - copied);
549
550
551 rds_stats_add(s_copy_to_user, to_copy);
552 ret = copy_page_to_iter(sg_page(&frag->f_sg),
553 frag->f_sg.offset + frag_off,
554 to_copy,
555 to);
556 if (ret != to_copy)
557 return -EFAULT;
558
559 frag_off += to_copy;
560 copied += to_copy;
561 }
562
563 return copied;
564}
565
566
567void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
568{
569 struct ib_send_wr *wr = &ic->i_ack_wr;
570 struct ib_sge *sge = &ic->i_ack_sge;
571
572 sge->addr = ic->i_ack_dma;
573 sge->length = sizeof(struct rds_header);
574 sge->lkey = ic->i_pd->local_dma_lkey;
575
576 wr->sg_list = sge;
577 wr->num_sge = 1;
578 wr->opcode = IB_WR_SEND;
579 wr->wr_id = RDS_IB_ACK_WR_ID;
580 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605#ifndef KERNEL_HAS_ATOMIC64
606void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
607{
608 unsigned long flags;
609
610 spin_lock_irqsave(&ic->i_ack_lock, flags);
611 ic->i_ack_next = seq;
612 if (ack_required)
613 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
614 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
615}
616
617static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
618{
619 unsigned long flags;
620 u64 seq;
621
622 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
623
624 spin_lock_irqsave(&ic->i_ack_lock, flags);
625 seq = ic->i_ack_next;
626 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
627
628 return seq;
629}
630#else
631void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
632{
633 atomic64_set(&ic->i_ack_next, seq);
634 if (ack_required) {
635 smp_mb__before_atomic();
636 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
637 }
638}
639
640static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
641{
642 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
643 smp_mb__after_atomic();
644
645 return atomic64_read(&ic->i_ack_next);
646}
647#endif
648
649
650static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
651{
652 struct rds_header *hdr = ic->i_ack;
653 struct ib_send_wr *failed_wr;
654 u64 seq;
655 int ret;
656
657 seq = rds_ib_get_ack(ic);
658
659 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
660 rds_message_populate_header(hdr, 0, 0, 0);
661 hdr->h_ack = cpu_to_be64(seq);
662 hdr->h_credit = adv_credits;
663 rds_message_make_checksum(hdr);
664 ic->i_ack_queued = jiffies;
665
666 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
667 if (unlikely(ret)) {
668
669
670
671 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
672 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
673
674 rds_ib_stats_inc(s_ib_ack_send_failure);
675
676 rds_ib_conn_error(ic->conn, "sending ack failed\n");
677 } else
678 rds_ib_stats_inc(s_ib_ack_sent);
679}
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719void rds_ib_attempt_ack(struct rds_ib_connection *ic)
720{
721 unsigned int adv_credits;
722
723 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
724 return;
725
726 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
727 rds_ib_stats_inc(s_ib_ack_send_delayed);
728 return;
729 }
730
731
732 if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) {
733 rds_ib_stats_inc(s_ib_tx_throttle);
734 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
735 return;
736 }
737
738 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
739 rds_ib_send_ack(ic, adv_credits);
740}
741
742
743
744
745
746void rds_ib_ack_send_complete(struct rds_ib_connection *ic)
747{
748 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
749 rds_ib_attempt_ack(ic);
750}
751
752
753
754
755
756u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
757{
758 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
759 rds_ib_stats_inc(s_ib_ack_send_piggybacked);
760 return rds_ib_get_ack(ic);
761}
762
763
764
765
766
767
768
769
770
771static void rds_ib_cong_recv(struct rds_connection *conn,
772 struct rds_ib_incoming *ibinc)
773{
774 struct rds_cong_map *map;
775 unsigned int map_off;
776 unsigned int map_page;
777 struct rds_page_frag *frag;
778 unsigned long frag_off;
779 unsigned long to_copy;
780 unsigned long copied;
781 uint64_t uncongested = 0;
782 void *addr;
783
784
785 if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
786 return;
787
788 map = conn->c_fcong;
789 map_page = 0;
790 map_off = 0;
791
792 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
793 frag_off = 0;
794
795 copied = 0;
796
797 while (copied < RDS_CONG_MAP_BYTES) {
798 uint64_t *src, *dst;
799 unsigned int k;
800
801 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
802 BUG_ON(to_copy & 7);
803
804 addr = kmap_atomic(sg_page(&frag->f_sg));
805
806 src = addr + frag->f_sg.offset + frag_off;
807 dst = (void *)map->m_page_addrs[map_page] + map_off;
808 for (k = 0; k < to_copy; k += 8) {
809
810
811 uncongested |= ~(*src) & *dst;
812 *dst++ = *src++;
813 }
814 kunmap_atomic(addr);
815
816 copied += to_copy;
817
818 map_off += to_copy;
819 if (map_off == PAGE_SIZE) {
820 map_off = 0;
821 map_page++;
822 }
823
824 frag_off += to_copy;
825 if (frag_off == RDS_FRAG_SIZE) {
826 frag = list_entry(frag->f_item.next,
827 struct rds_page_frag, f_item);
828 frag_off = 0;
829 }
830 }
831
832
833 uncongested = le64_to_cpu(uncongested);
834
835 rds_cong_map_updated(map, uncongested);
836}
837
838static void rds_ib_process_recv(struct rds_connection *conn,
839 struct rds_ib_recv_work *recv, u32 data_len,
840 struct rds_ib_ack_state *state)
841{
842 struct rds_ib_connection *ic = conn->c_transport_data;
843 struct rds_ib_incoming *ibinc = ic->i_ibinc;
844 struct rds_header *ihdr, *hdr;
845
846
847
848 rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv,
849 data_len);
850
851 if (data_len < sizeof(struct rds_header)) {
852 rds_ib_conn_error(conn, "incoming message "
853 "from %pI4 didn't include a "
854 "header, disconnecting and "
855 "reconnecting\n",
856 &conn->c_faddr);
857 return;
858 }
859 data_len -= sizeof(struct rds_header);
860
861 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
862
863
864 if (!rds_message_verify_checksum(ihdr)) {
865 rds_ib_conn_error(conn, "incoming message "
866 "from %pI4 has corrupted header - "
867 "forcing a reconnect\n",
868 &conn->c_faddr);
869 rds_stats_inc(s_recv_drop_bad_checksum);
870 return;
871 }
872
873
874 state->ack_recv = be64_to_cpu(ihdr->h_ack);
875 state->ack_recv_valid = 1;
876
877
878 if (ihdr->h_credit)
879 rds_ib_send_add_credits(conn, ihdr->h_credit);
880
881 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && data_len == 0) {
882
883
884
885
886 rds_ib_stats_inc(s_ib_ack_received);
887
888
889
890
891
892
893
894
895
896
897 rds_ib_frag_free(ic, recv->r_frag);
898 recv->r_frag = NULL;
899 return;
900 }
901
902
903
904
905
906
907
908 if (!ibinc) {
909 ibinc = recv->r_ibinc;
910 recv->r_ibinc = NULL;
911 ic->i_ibinc = ibinc;
912
913 hdr = &ibinc->ii_inc.i_hdr;
914 ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
915 local_clock();
916 memcpy(hdr, ihdr, sizeof(*hdr));
917 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
918 ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
919 local_clock();
920
921 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
922 ic->i_recv_data_rem, hdr->h_flags);
923 } else {
924 hdr = &ibinc->ii_inc.i_hdr;
925
926
927 if (hdr->h_sequence != ihdr->h_sequence ||
928 hdr->h_len != ihdr->h_len ||
929 hdr->h_sport != ihdr->h_sport ||
930 hdr->h_dport != ihdr->h_dport) {
931 rds_ib_conn_error(conn,
932 "fragment header mismatch; forcing reconnect\n");
933 return;
934 }
935 }
936
937 list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags);
938 recv->r_frag = NULL;
939
940 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
941 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
942 else {
943 ic->i_recv_data_rem = 0;
944 ic->i_ibinc = NULL;
945
946 if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
947 rds_ib_cong_recv(conn, ibinc);
948 else {
949 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
950 &ibinc->ii_inc, GFP_ATOMIC);
951 state->ack_next = be64_to_cpu(hdr->h_sequence);
952 state->ack_next_valid = 1;
953 }
954
955
956
957
958 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
959 rds_stats_inc(s_recv_ack_required);
960 state->ack_required = 1;
961 }
962
963 rds_inc_put(&ibinc->ii_inc);
964 }
965}
966
967void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
968 struct ib_wc *wc,
969 struct rds_ib_ack_state *state)
970{
971 struct rds_connection *conn = ic->conn;
972 struct rds_ib_recv_work *recv;
973
974 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
975 (unsigned long long)wc->wr_id, wc->status,
976 ib_wc_status_msg(wc->status), wc->byte_len,
977 be32_to_cpu(wc->ex.imm_data));
978
979 rds_ib_stats_inc(s_ib_rx_cq_event);
980 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
981 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1,
982 DMA_FROM_DEVICE);
983
984
985
986
987
988 if (wc->status == IB_WC_SUCCESS) {
989 rds_ib_process_recv(conn, recv, wc->byte_len, state);
990 } else {
991
992 if (rds_conn_up(conn) || rds_conn_connecting(conn))
993 rds_ib_conn_error(conn, "recv completion on <%pI4,%pI4> had status %u (%s), disconnecting and reconnecting\n",
994 &conn->c_laddr, &conn->c_faddr,
995 wc->status,
996 ib_wc_status_msg(wc->status));
997 }
998
999
1000
1001
1002
1003
1004
1005
1006 if (recv->r_frag) {
1007 rds_ib_frag_free(ic, recv->r_frag);
1008 recv->r_frag = NULL;
1009 }
1010 rds_ib_ring_free(&ic->i_recv_ring, 1);
1011
1012
1013
1014
1015 if (rds_ib_ring_empty(&ic->i_recv_ring))
1016 rds_ib_stats_inc(s_ib_rx_ring_empty);
1017
1018 if (rds_ib_ring_low(&ic->i_recv_ring)) {
1019 rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
1020 rds_ib_stats_inc(s_ib_rx_refill_from_cq);
1021 }
1022}
1023
1024int rds_ib_recv_path(struct rds_conn_path *cp)
1025{
1026 struct rds_connection *conn = cp->cp_conn;
1027 struct rds_ib_connection *ic = conn->c_transport_data;
1028 int ret = 0;
1029
1030 rdsdebug("conn %p\n", conn);
1031 if (rds_conn_up(conn)) {
1032 rds_ib_attempt_ack(ic);
1033 rds_ib_recv_refill(conn, 0, GFP_KERNEL);
1034 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
1035 }
1036
1037 return ret;
1038}
1039
1040int rds_ib_recv_init(void)
1041{
1042 struct sysinfo si;
1043 int ret = -ENOMEM;
1044
1045
1046 si_meminfo(&si);
1047 rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
1048
1049 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
1050 sizeof(struct rds_ib_incoming),
1051 0, SLAB_HWCACHE_ALIGN, NULL);
1052 if (!rds_ib_incoming_slab)
1053 goto out;
1054
1055 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
1056 sizeof(struct rds_page_frag),
1057 0, SLAB_HWCACHE_ALIGN, NULL);
1058 if (!rds_ib_frag_slab) {
1059 kmem_cache_destroy(rds_ib_incoming_slab);
1060 rds_ib_incoming_slab = NULL;
1061 } else
1062 ret = 0;
1063out:
1064 return ret;
1065}
1066
1067void rds_ib_recv_exit(void)
1068{
1069 kmem_cache_destroy(rds_ib_incoming_slab);
1070 kmem_cache_destroy(rds_ib_frag_slab);
1071}
1072