1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <linux/kernel.h>
34#include <linux/slab.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <rdma/rdma_cm.h>
38
39#include "rds_single_path.h"
40#include "rds.h"
41#include "ib.h"
42
43static struct kmem_cache *rds_ib_incoming_slab;
44static struct kmem_cache *rds_ib_frag_slab;
45static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
46
47void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
48{
49 struct rds_ib_recv_work *recv;
50 u32 i;
51
52 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
53 struct ib_sge *sge;
54
55 recv->r_ibinc = NULL;
56 recv->r_frag = NULL;
57
58 recv->r_wr.next = NULL;
59 recv->r_wr.wr_id = i;
60 recv->r_wr.sg_list = recv->r_sge;
61 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
62
63 sge = &recv->r_sge[0];
64 sge->addr = ic->i_recv_hdrs_dma[i];
65 sge->length = sizeof(struct rds_header);
66 sge->lkey = ic->i_pd->local_dma_lkey;
67
68 sge = &recv->r_sge[1];
69 sge->addr = 0;
70 sge->length = RDS_FRAG_SIZE;
71 sge->lkey = ic->i_pd->local_dma_lkey;
72 }
73}
74
75
76
77
78
79static void list_splice_entire_tail(struct list_head *from,
80 struct list_head *to)
81{
82 struct list_head *from_last = from->prev;
83
84 list_splice_tail(from_last, to);
85 list_add_tail(from_last, to);
86}
87
88static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
89{
90 struct list_head *tmp;
91
92 tmp = xchg(&cache->xfer, NULL);
93 if (tmp) {
94 if (cache->ready)
95 list_splice_entire_tail(tmp, cache->ready);
96 else
97 cache->ready = tmp;
98 }
99}
100
101static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache, gfp_t gfp)
102{
103 struct rds_ib_cache_head *head;
104 int cpu;
105
106 cache->percpu = alloc_percpu_gfp(struct rds_ib_cache_head, gfp);
107 if (!cache->percpu)
108 return -ENOMEM;
109
110 for_each_possible_cpu(cpu) {
111 head = per_cpu_ptr(cache->percpu, cpu);
112 head->first = NULL;
113 head->count = 0;
114 }
115 cache->xfer = NULL;
116 cache->ready = NULL;
117
118 return 0;
119}
120
121int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic, gfp_t gfp)
122{
123 int ret;
124
125 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs, gfp);
126 if (!ret) {
127 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags, gfp);
128 if (ret)
129 free_percpu(ic->i_cache_incs.percpu);
130 }
131
132 return ret;
133}
134
135static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
136 struct list_head *caller_list)
137{
138 struct rds_ib_cache_head *head;
139 int cpu;
140
141 for_each_possible_cpu(cpu) {
142 head = per_cpu_ptr(cache->percpu, cpu);
143 if (head->first) {
144 list_splice_entire_tail(head->first, caller_list);
145 head->first = NULL;
146 }
147 }
148
149 if (cache->ready) {
150 list_splice_entire_tail(cache->ready, caller_list);
151 cache->ready = NULL;
152 }
153}
154
155void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
156{
157 struct rds_ib_incoming *inc;
158 struct rds_ib_incoming *inc_tmp;
159 struct rds_page_frag *frag;
160 struct rds_page_frag *frag_tmp;
161 LIST_HEAD(list);
162
163 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
164 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
165 free_percpu(ic->i_cache_incs.percpu);
166
167 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
168 list_del(&inc->ii_cache_entry);
169 WARN_ON(!list_empty(&inc->ii_frags));
170 kmem_cache_free(rds_ib_incoming_slab, inc);
171 atomic_dec(&rds_ib_allocation);
172 }
173
174 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
175 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
176 free_percpu(ic->i_cache_frags.percpu);
177
178 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
179 list_del(&frag->f_cache_entry);
180 WARN_ON(!list_empty(&frag->f_item));
181 kmem_cache_free(rds_ib_frag_slab, frag);
182 }
183}
184
185
186static void rds_ib_recv_cache_put(struct list_head *new_item,
187 struct rds_ib_refill_cache *cache);
188static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
189
190
191
192static void rds_ib_frag_free(struct rds_ib_connection *ic,
193 struct rds_page_frag *frag)
194{
195 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
196
197 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
198 atomic_add(RDS_FRAG_SIZE / SZ_1K, &ic->i_cache_allocs);
199 rds_ib_stats_add(s_ib_recv_added_to_cache, RDS_FRAG_SIZE);
200}
201
202
203void rds_ib_inc_free(struct rds_incoming *inc)
204{
205 struct rds_ib_incoming *ibinc;
206 struct rds_page_frag *frag;
207 struct rds_page_frag *pos;
208 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
209
210 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
211
212
213 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
214 list_del_init(&frag->f_item);
215 rds_ib_frag_free(ic, frag);
216 }
217 BUG_ON(!list_empty(&ibinc->ii_frags));
218
219 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
220 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
221}
222
223static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
224 struct rds_ib_recv_work *recv)
225{
226 if (recv->r_ibinc) {
227 rds_inc_put(&recv->r_ibinc->ii_inc);
228 recv->r_ibinc = NULL;
229 }
230 if (recv->r_frag) {
231 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
232 rds_ib_frag_free(ic, recv->r_frag);
233 recv->r_frag = NULL;
234 }
235}
236
237void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
238{
239 u32 i;
240
241 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
242 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
243}
244
245static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
246 gfp_t slab_mask)
247{
248 struct rds_ib_incoming *ibinc;
249 struct list_head *cache_item;
250 int avail_allocs;
251
252 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
253 if (cache_item) {
254 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
255 } else {
256 avail_allocs = atomic_add_unless(&rds_ib_allocation,
257 1, rds_ib_sysctl_max_recv_allocation);
258 if (!avail_allocs) {
259 rds_ib_stats_inc(s_ib_rx_alloc_limit);
260 return NULL;
261 }
262 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
263 if (!ibinc) {
264 atomic_dec(&rds_ib_allocation);
265 return NULL;
266 }
267 rds_ib_stats_inc(s_ib_rx_total_incs);
268 }
269 INIT_LIST_HEAD(&ibinc->ii_frags);
270 rds_inc_init(&ibinc->ii_inc, ic->conn, &ic->conn->c_faddr);
271
272 return ibinc;
273}
274
275static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
276 gfp_t slab_mask, gfp_t page_mask)
277{
278 struct rds_page_frag *frag;
279 struct list_head *cache_item;
280 int ret;
281
282 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
283 if (cache_item) {
284 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
285 atomic_sub(RDS_FRAG_SIZE / SZ_1K, &ic->i_cache_allocs);
286 rds_ib_stats_add(s_ib_recv_added_to_cache, RDS_FRAG_SIZE);
287 } else {
288 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
289 if (!frag)
290 return NULL;
291
292 sg_init_table(&frag->f_sg, 1);
293 ret = rds_page_remainder_alloc(&frag->f_sg,
294 RDS_FRAG_SIZE, page_mask);
295 if (ret) {
296 kmem_cache_free(rds_ib_frag_slab, frag);
297 return NULL;
298 }
299 rds_ib_stats_inc(s_ib_rx_total_frags);
300 }
301
302 INIT_LIST_HEAD(&frag->f_item);
303
304 return frag;
305}
306
307static int rds_ib_recv_refill_one(struct rds_connection *conn,
308 struct rds_ib_recv_work *recv, gfp_t gfp)
309{
310 struct rds_ib_connection *ic = conn->c_transport_data;
311 struct ib_sge *sge;
312 int ret = -ENOMEM;
313 gfp_t slab_mask = gfp;
314 gfp_t page_mask = gfp;
315
316 if (gfp & __GFP_DIRECT_RECLAIM) {
317 slab_mask = GFP_KERNEL;
318 page_mask = GFP_HIGHUSER;
319 }
320
321 if (!ic->i_cache_incs.ready)
322 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
323 if (!ic->i_cache_frags.ready)
324 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
325
326
327
328
329
330 if (!recv->r_ibinc) {
331 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
332 if (!recv->r_ibinc)
333 goto out;
334 }
335
336 WARN_ON(recv->r_frag);
337 recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
338 if (!recv->r_frag)
339 goto out;
340
341 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
342 1, DMA_FROM_DEVICE);
343 WARN_ON(ret != 1);
344
345 sge = &recv->r_sge[0];
346 sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
347 sge->length = sizeof(struct rds_header);
348
349 sge = &recv->r_sge[1];
350 sge->addr = sg_dma_address(&recv->r_frag->f_sg);
351 sge->length = sg_dma_len(&recv->r_frag->f_sg);
352
353 ret = 0;
354out:
355 return ret;
356}
357
358static int acquire_refill(struct rds_connection *conn)
359{
360 return test_and_set_bit(RDS_RECV_REFILL, &conn->c_flags) == 0;
361}
362
363static void release_refill(struct rds_connection *conn)
364{
365 clear_bit(RDS_RECV_REFILL, &conn->c_flags);
366
367
368
369
370
371
372 if (waitqueue_active(&conn->c_waitq))
373 wake_up_all(&conn->c_waitq);
374}
375
376
377
378
379
380
381void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
382{
383 struct rds_ib_connection *ic = conn->c_transport_data;
384 struct rds_ib_recv_work *recv;
385 unsigned int posted = 0;
386 int ret = 0;
387 bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
388 bool must_wake = false;
389 u32 pos;
390
391
392
393
394
395 if (!acquire_refill(conn))
396 return;
397
398 while ((prefill || rds_conn_up(conn)) &&
399 rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
400 if (pos >= ic->i_recv_ring.w_nr) {
401 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
402 pos);
403 break;
404 }
405
406 recv = &ic->i_recvs[pos];
407 ret = rds_ib_recv_refill_one(conn, recv, gfp);
408 if (ret) {
409 must_wake = true;
410 break;
411 }
412
413 rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
414 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
415 (long)sg_dma_address(&recv->r_frag->f_sg));
416
417
418 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL);
419 if (ret) {
420 rds_ib_conn_error(conn, "recv post on "
421 "%pI6c returned %d, disconnecting and "
422 "reconnecting\n", &conn->c_faddr,
423 ret);
424 break;
425 }
426
427 posted++;
428
429 if ((posted > 128 && need_resched()) || posted > 8192) {
430 must_wake = true;
431 break;
432 }
433 }
434
435
436 if (ic->i_flowctl && posted)
437 rds_ib_advertise_credits(conn, posted);
438
439 if (ret)
440 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
441
442 release_refill(conn);
443
444
445
446
447
448
449
450
451
452
453
454 if (rds_conn_up(conn) &&
455 (must_wake ||
456 (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
457 rds_ib_ring_empty(&ic->i_recv_ring))) {
458 queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
459 }
460 if (can_wait)
461 cond_resched();
462}
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477static void rds_ib_recv_cache_put(struct list_head *new_item,
478 struct rds_ib_refill_cache *cache)
479{
480 unsigned long flags;
481 struct list_head *old, *chpfirst;
482
483 local_irq_save(flags);
484
485 chpfirst = __this_cpu_read(cache->percpu->first);
486 if (!chpfirst)
487 INIT_LIST_HEAD(new_item);
488 else
489 list_add_tail(new_item, chpfirst);
490
491 __this_cpu_write(cache->percpu->first, new_item);
492 __this_cpu_inc(cache->percpu->count);
493
494 if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT)
495 goto end;
496
497
498
499
500
501
502
503 do {
504 old = xchg(&cache->xfer, NULL);
505 if (old)
506 list_splice_entire_tail(old, chpfirst);
507 old = cmpxchg(&cache->xfer, NULL, chpfirst);
508 } while (old);
509
510
511 __this_cpu_write(cache->percpu->first, NULL);
512 __this_cpu_write(cache->percpu->count, 0);
513end:
514 local_irq_restore(flags);
515}
516
517static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
518{
519 struct list_head *head = cache->ready;
520
521 if (head) {
522 if (!list_empty(head)) {
523 cache->ready = head->next;
524 list_del_init(head);
525 } else
526 cache->ready = NULL;
527 }
528
529 return head;
530}
531
532int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
533{
534 struct rds_ib_incoming *ibinc;
535 struct rds_page_frag *frag;
536 unsigned long to_copy;
537 unsigned long frag_off = 0;
538 int copied = 0;
539 int ret;
540 u32 len;
541
542 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
543 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
544 len = be32_to_cpu(inc->i_hdr.h_len);
545
546 while (iov_iter_count(to) && copied < len) {
547 if (frag_off == RDS_FRAG_SIZE) {
548 frag = list_entry(frag->f_item.next,
549 struct rds_page_frag, f_item);
550 frag_off = 0;
551 }
552 to_copy = min_t(unsigned long, iov_iter_count(to),
553 RDS_FRAG_SIZE - frag_off);
554 to_copy = min_t(unsigned long, to_copy, len - copied);
555
556
557 rds_stats_add(s_copy_to_user, to_copy);
558 ret = copy_page_to_iter(sg_page(&frag->f_sg),
559 frag->f_sg.offset + frag_off,
560 to_copy,
561 to);
562 if (ret != to_copy)
563 return -EFAULT;
564
565 frag_off += to_copy;
566 copied += to_copy;
567 }
568
569 return copied;
570}
571
572
573void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
574{
575 struct ib_send_wr *wr = &ic->i_ack_wr;
576 struct ib_sge *sge = &ic->i_ack_sge;
577
578 sge->addr = ic->i_ack_dma;
579 sge->length = sizeof(struct rds_header);
580 sge->lkey = ic->i_pd->local_dma_lkey;
581
582 wr->sg_list = sge;
583 wr->num_sge = 1;
584 wr->opcode = IB_WR_SEND;
585 wr->wr_id = RDS_IB_ACK_WR_ID;
586 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
587}
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611#ifndef KERNEL_HAS_ATOMIC64
612void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
613{
614 unsigned long flags;
615
616 spin_lock_irqsave(&ic->i_ack_lock, flags);
617 ic->i_ack_next = seq;
618 if (ack_required)
619 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
620 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
621}
622
623static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
624{
625 unsigned long flags;
626 u64 seq;
627
628 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
629
630 spin_lock_irqsave(&ic->i_ack_lock, flags);
631 seq = ic->i_ack_next;
632 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
633
634 return seq;
635}
636#else
637void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
638{
639 atomic64_set(&ic->i_ack_next, seq);
640 if (ack_required) {
641 smp_mb__before_atomic();
642 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
643 }
644}
645
646static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
647{
648 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
649 smp_mb__after_atomic();
650
651 return atomic64_read(&ic->i_ack_next);
652}
653#endif
654
655
656static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
657{
658 struct rds_header *hdr = ic->i_ack;
659 u64 seq;
660 int ret;
661
662 seq = rds_ib_get_ack(ic);
663
664 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
665
666 ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma,
667 sizeof(*hdr), DMA_TO_DEVICE);
668 rds_message_populate_header(hdr, 0, 0, 0);
669 hdr->h_ack = cpu_to_be64(seq);
670 hdr->h_credit = adv_credits;
671 rds_message_make_checksum(hdr);
672 ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma,
673 sizeof(*hdr), DMA_TO_DEVICE);
674
675 ic->i_ack_queued = jiffies;
676
677 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL);
678 if (unlikely(ret)) {
679
680
681
682 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
683 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
684
685 rds_ib_stats_inc(s_ib_ack_send_failure);
686
687 rds_ib_conn_error(ic->conn, "sending ack failed\n");
688 } else
689 rds_ib_stats_inc(s_ib_ack_sent);
690}
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730void rds_ib_attempt_ack(struct rds_ib_connection *ic)
731{
732 unsigned int adv_credits;
733
734 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
735 return;
736
737 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
738 rds_ib_stats_inc(s_ib_ack_send_delayed);
739 return;
740 }
741
742
743 if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) {
744 rds_ib_stats_inc(s_ib_tx_throttle);
745 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
746 return;
747 }
748
749 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
750 rds_ib_send_ack(ic, adv_credits);
751}
752
753
754
755
756
757void rds_ib_ack_send_complete(struct rds_ib_connection *ic)
758{
759 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
760 rds_ib_attempt_ack(ic);
761}
762
763
764
765
766
767u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
768{
769 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
770 rds_ib_stats_inc(s_ib_ack_send_piggybacked);
771 return rds_ib_get_ack(ic);
772}
773
774
775
776
777
778
779
780
781
782static void rds_ib_cong_recv(struct rds_connection *conn,
783 struct rds_ib_incoming *ibinc)
784{
785 struct rds_cong_map *map;
786 unsigned int map_off;
787 unsigned int map_page;
788 struct rds_page_frag *frag;
789 unsigned long frag_off;
790 unsigned long to_copy;
791 unsigned long copied;
792 __le64 uncongested = 0;
793 void *addr;
794
795
796 if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
797 return;
798
799 map = conn->c_fcong;
800 map_page = 0;
801 map_off = 0;
802
803 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
804 frag_off = 0;
805
806 copied = 0;
807
808 while (copied < RDS_CONG_MAP_BYTES) {
809 __le64 *src, *dst;
810 unsigned int k;
811
812 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
813 BUG_ON(to_copy & 7);
814
815 addr = kmap_atomic(sg_page(&frag->f_sg));
816
817 src = addr + frag->f_sg.offset + frag_off;
818 dst = (void *)map->m_page_addrs[map_page] + map_off;
819 for (k = 0; k < to_copy; k += 8) {
820
821
822 uncongested |= ~(*src) & *dst;
823 *dst++ = *src++;
824 }
825 kunmap_atomic(addr);
826
827 copied += to_copy;
828
829 map_off += to_copy;
830 if (map_off == PAGE_SIZE) {
831 map_off = 0;
832 map_page++;
833 }
834
835 frag_off += to_copy;
836 if (frag_off == RDS_FRAG_SIZE) {
837 frag = list_entry(frag->f_item.next,
838 struct rds_page_frag, f_item);
839 frag_off = 0;
840 }
841 }
842
843
844 rds_cong_map_updated(map, le64_to_cpu(uncongested));
845}
846
847static void rds_ib_process_recv(struct rds_connection *conn,
848 struct rds_ib_recv_work *recv, u32 data_len,
849 struct rds_ib_ack_state *state)
850{
851 struct rds_ib_connection *ic = conn->c_transport_data;
852 struct rds_ib_incoming *ibinc = ic->i_ibinc;
853 struct rds_header *ihdr, *hdr;
854 dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
855
856
857
858 rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv,
859 data_len);
860
861 if (data_len < sizeof(struct rds_header)) {
862 rds_ib_conn_error(conn, "incoming message "
863 "from %pI6c didn't include a "
864 "header, disconnecting and "
865 "reconnecting\n",
866 &conn->c_faddr);
867 return;
868 }
869 data_len -= sizeof(struct rds_header);
870
871 ihdr = ic->i_recv_hdrs[recv - ic->i_recvs];
872
873 ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr,
874 sizeof(*ihdr), DMA_FROM_DEVICE);
875
876 if (!rds_message_verify_checksum(ihdr)) {
877 rds_ib_conn_error(conn, "incoming message "
878 "from %pI6c has corrupted header - "
879 "forcing a reconnect\n",
880 &conn->c_faddr);
881 rds_stats_inc(s_recv_drop_bad_checksum);
882 goto done;
883 }
884
885
886 state->ack_recv = be64_to_cpu(ihdr->h_ack);
887 state->ack_recv_valid = 1;
888
889
890 if (ihdr->h_credit)
891 rds_ib_send_add_credits(conn, ihdr->h_credit);
892
893 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && data_len == 0) {
894
895
896
897
898 rds_ib_stats_inc(s_ib_ack_received);
899
900
901
902
903
904
905
906
907
908
909 rds_ib_frag_free(ic, recv->r_frag);
910 recv->r_frag = NULL;
911 goto done;
912 }
913
914
915
916
917
918
919
920 if (!ibinc) {
921 ibinc = recv->r_ibinc;
922 recv->r_ibinc = NULL;
923 ic->i_ibinc = ibinc;
924
925 hdr = &ibinc->ii_inc.i_hdr;
926 ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
927 local_clock();
928 memcpy(hdr, ihdr, sizeof(*hdr));
929 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
930 ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
931 local_clock();
932
933 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
934 ic->i_recv_data_rem, hdr->h_flags);
935 } else {
936 hdr = &ibinc->ii_inc.i_hdr;
937
938
939 if (hdr->h_sequence != ihdr->h_sequence ||
940 hdr->h_len != ihdr->h_len ||
941 hdr->h_sport != ihdr->h_sport ||
942 hdr->h_dport != ihdr->h_dport) {
943 rds_ib_conn_error(conn,
944 "fragment header mismatch; forcing reconnect\n");
945 goto done;
946 }
947 }
948
949 list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags);
950 recv->r_frag = NULL;
951
952 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
953 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
954 else {
955 ic->i_recv_data_rem = 0;
956 ic->i_ibinc = NULL;
957
958 if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) {
959 rds_ib_cong_recv(conn, ibinc);
960 } else {
961 rds_recv_incoming(conn, &conn->c_faddr, &conn->c_laddr,
962 &ibinc->ii_inc, GFP_ATOMIC);
963 state->ack_next = be64_to_cpu(hdr->h_sequence);
964 state->ack_next_valid = 1;
965 }
966
967
968
969
970 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
971 rds_stats_inc(s_recv_ack_required);
972 state->ack_required = 1;
973 }
974
975 rds_inc_put(&ibinc->ii_inc);
976 }
977done:
978 ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr,
979 sizeof(*ihdr), DMA_FROM_DEVICE);
980}
981
982void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
983 struct ib_wc *wc,
984 struct rds_ib_ack_state *state)
985{
986 struct rds_connection *conn = ic->conn;
987 struct rds_ib_recv_work *recv;
988
989 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
990 (unsigned long long)wc->wr_id, wc->status,
991 ib_wc_status_msg(wc->status), wc->byte_len,
992 be32_to_cpu(wc->ex.imm_data));
993
994 rds_ib_stats_inc(s_ib_rx_cq_event);
995 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
996 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1,
997 DMA_FROM_DEVICE);
998
999
1000
1001
1002
1003 if (wc->status == IB_WC_SUCCESS) {
1004 rds_ib_process_recv(conn, recv, wc->byte_len, state);
1005 } else {
1006
1007 if (rds_conn_up(conn) || rds_conn_connecting(conn))
1008 rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",
1009 &conn->c_laddr, &conn->c_faddr,
1010 conn->c_tos, wc->status,
1011 ib_wc_status_msg(wc->status),
1012 wc->vendor_err);
1013 }
1014
1015
1016
1017
1018
1019
1020
1021
1022 if (recv->r_frag) {
1023 rds_ib_frag_free(ic, recv->r_frag);
1024 recv->r_frag = NULL;
1025 }
1026 rds_ib_ring_free(&ic->i_recv_ring, 1);
1027
1028
1029
1030
1031 if (rds_ib_ring_empty(&ic->i_recv_ring))
1032 rds_ib_stats_inc(s_ib_rx_ring_empty);
1033
1034 if (rds_ib_ring_low(&ic->i_recv_ring)) {
1035 rds_ib_recv_refill(conn, 0, GFP_NOWAIT | __GFP_NOWARN);
1036 rds_ib_stats_inc(s_ib_rx_refill_from_cq);
1037 }
1038}
1039
1040int rds_ib_recv_path(struct rds_conn_path *cp)
1041{
1042 struct rds_connection *conn = cp->cp_conn;
1043 struct rds_ib_connection *ic = conn->c_transport_data;
1044
1045 rdsdebug("conn %p\n", conn);
1046 if (rds_conn_up(conn)) {
1047 rds_ib_attempt_ack(ic);
1048 rds_ib_recv_refill(conn, 0, GFP_KERNEL);
1049 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
1050 }
1051
1052 return 0;
1053}
1054
1055int rds_ib_recv_init(void)
1056{
1057 struct sysinfo si;
1058 int ret = -ENOMEM;
1059
1060
1061 si_meminfo(&si);
1062 rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
1063
1064 rds_ib_incoming_slab =
1065 kmem_cache_create_usercopy("rds_ib_incoming",
1066 sizeof(struct rds_ib_incoming),
1067 0, SLAB_HWCACHE_ALIGN,
1068 offsetof(struct rds_ib_incoming,
1069 ii_inc.i_usercopy),
1070 sizeof(struct rds_inc_usercopy),
1071 NULL);
1072 if (!rds_ib_incoming_slab)
1073 goto out;
1074
1075 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
1076 sizeof(struct rds_page_frag),
1077 0, SLAB_HWCACHE_ALIGN, NULL);
1078 if (!rds_ib_frag_slab) {
1079 kmem_cache_destroy(rds_ib_incoming_slab);
1080 rds_ib_incoming_slab = NULL;
1081 } else
1082 ret = 0;
1083out:
1084 return ret;
1085}
1086
1087void rds_ib_recv_exit(void)
1088{
1089 WARN_ON(atomic_read(&rds_ib_allocation));
1090
1091 kmem_cache_destroy(rds_ib_incoming_slab);
1092 kmem_cache_destroy(rds_ib_frag_slab);
1093}
1094