1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50#include <linux/highmem.h>
51
52#include <linux/sunrpc/svc_rdma.h>
53
54#include "xprt_rdma.h"
55#include <trace/events/rpcrdma.h>
56
57#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
58# define RPCDBG_FACILITY RPCDBG_TRANS
59#endif
60
61
62
63
64
65
66static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
67{
68 unsigned int size;
69
70
71 size = RPCRDMA_HDRLEN_MIN;
72
73
74 size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
75
76
77 size += sizeof(__be32);
78 size += rpcrdma_segment_maxsz * sizeof(__be32);
79 size += sizeof(__be32);
80
81 dprintk("RPC: %s: max call header size = %u\n",
82 __func__, size);
83 return size;
84}
85
86
87
88
89
90
91static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
92{
93 unsigned int size;
94
95
96 size = RPCRDMA_HDRLEN_MIN;
97
98
99 size = sizeof(__be32);
100 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
101 size += sizeof(__be32);
102
103 dprintk("RPC: %s: max reply header size = %u\n",
104 __func__, size);
105 return size;
106}
107
108
109
110
111
112
113
114
115
116void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
117{
118 unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs;
119 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
120
121 ep->rep_max_inline_send =
122 ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
123 ep->rep_max_inline_recv =
124 ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
125}
126
127
128
129
130
131
132
133
134
135static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
136 struct rpc_rqst *rqst)
137{
138 struct xdr_buf *xdr = &rqst->rq_snd_buf;
139 unsigned int count, remaining, offset;
140
141 if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
142 return false;
143
144 if (xdr->page_len) {
145 remaining = xdr->page_len;
146 offset = offset_in_page(xdr->page_base);
147 count = RPCRDMA_MIN_SEND_SGES;
148 while (remaining) {
149 remaining -= min_t(unsigned int,
150 PAGE_SIZE - offset, remaining);
151 offset = 0;
152 if (++count > r_xprt->rx_ia.ri_max_send_sges)
153 return false;
154 }
155 }
156
157 return true;
158}
159
160
161
162
163
164
165
166static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
167 struct rpc_rqst *rqst)
168{
169 return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
170}
171
172
173
174
175
176static bool
177rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
178 const struct rpc_rqst *rqst)
179{
180 const struct xdr_buf *buf = &rqst->rq_rcv_buf;
181
182 return (buf->head[0].iov_len + buf->tail[0].iov_len) <
183 r_xprt->rx_ep.rep_max_inline_recv;
184}
185
186
187
188
189
190
191
192
193static struct rpcrdma_mr_seg *
194rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
195 unsigned int *n)
196{
197 u32 remaining, page_offset;
198 char *base;
199
200 base = vec->iov_base;
201 page_offset = offset_in_page(base);
202 remaining = vec->iov_len;
203 while (remaining) {
204 seg->mr_page = NULL;
205 seg->mr_offset = base;
206 seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
207 remaining -= seg->mr_len;
208 base += seg->mr_len;
209 ++seg;
210 ++(*n);
211 page_offset = 0;
212 }
213 return seg;
214}
215
216
217
218
219
220
221
222
223static int
224rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
225 unsigned int pos, enum rpcrdma_chunktype type,
226 struct rpcrdma_mr_seg *seg)
227{
228 unsigned long page_base;
229 unsigned int len, n;
230 struct page **ppages;
231
232 n = 0;
233 if (pos == 0)
234 seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n);
235
236 len = xdrbuf->page_len;
237 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
238 page_base = offset_in_page(xdrbuf->page_base);
239 while (len) {
240
241
242
243 if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
244 if (!*ppages)
245 *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
246 if (!*ppages)
247 return -ENOBUFS;
248 }
249 seg->mr_page = *ppages;
250 seg->mr_offset = (char *)page_base;
251 seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
252 len -= seg->mr_len;
253 ++ppages;
254 ++seg;
255 ++n;
256 page_base = 0;
257 }
258
259
260
261
262 if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
263 goto out;
264
265
266
267
268
269
270 if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
271 goto out;
272
273 if (xdrbuf->tail[0].iov_len)
274 seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
275
276out:
277 if (unlikely(n > RPCRDMA_MAX_SEGS))
278 return -EIO;
279 return n;
280}
281
282static inline int
283encode_item_present(struct xdr_stream *xdr)
284{
285 __be32 *p;
286
287 p = xdr_reserve_space(xdr, sizeof(*p));
288 if (unlikely(!p))
289 return -EMSGSIZE;
290
291 *p = xdr_one;
292 return 0;
293}
294
295static inline int
296encode_item_not_present(struct xdr_stream *xdr)
297{
298 __be32 *p;
299
300 p = xdr_reserve_space(xdr, sizeof(*p));
301 if (unlikely(!p))
302 return -EMSGSIZE;
303
304 *p = xdr_zero;
305 return 0;
306}
307
308static void
309xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
310{
311 *iptr++ = cpu_to_be32(mr->mr_handle);
312 *iptr++ = cpu_to_be32(mr->mr_length);
313 xdr_encode_hyper(iptr, mr->mr_offset);
314}
315
316static int
317encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
318{
319 __be32 *p;
320
321 p = xdr_reserve_space(xdr, 4 * sizeof(*p));
322 if (unlikely(!p))
323 return -EMSGSIZE;
324
325 xdr_encode_rdma_segment(p, mr);
326 return 0;
327}
328
329static int
330encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
331 u32 position)
332{
333 __be32 *p;
334
335 p = xdr_reserve_space(xdr, 6 * sizeof(*p));
336 if (unlikely(!p))
337 return -EMSGSIZE;
338
339 *p++ = xdr_one;
340 *p++ = cpu_to_be32(position);
341 xdr_encode_rdma_segment(p, mr);
342 return 0;
343}
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359static noinline int
360rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
361 struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype)
362{
363 struct xdr_stream *xdr = &req->rl_stream;
364 struct rpcrdma_mr_seg *seg;
365 struct rpcrdma_mr *mr;
366 unsigned int pos;
367 int nsegs;
368
369 if (rtype == rpcrdma_noch)
370 goto done;
371
372 pos = rqst->rq_snd_buf.head[0].iov_len;
373 if (rtype == rpcrdma_areadch)
374 pos = 0;
375 seg = req->rl_segments;
376 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
377 rtype, seg);
378 if (nsegs < 0)
379 return nsegs;
380
381 do {
382 seg = frwr_map(r_xprt, seg, nsegs, false, rqst->rq_xid, &mr);
383 if (IS_ERR(seg))
384 return PTR_ERR(seg);
385 rpcrdma_mr_push(mr, &req->rl_registered);
386
387 if (encode_read_segment(xdr, mr, pos) < 0)
388 return -EMSGSIZE;
389
390 trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
391 r_xprt->rx_stats.read_chunk_count++;
392 nsegs -= mr->mr_nents;
393 } while (nsegs);
394
395done:
396 return encode_item_not_present(xdr);
397}
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414static noinline int
415rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
416 struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
417{
418 struct xdr_stream *xdr = &req->rl_stream;
419 struct rpcrdma_mr_seg *seg;
420 struct rpcrdma_mr *mr;
421 int nsegs, nchunks;
422 __be32 *segcount;
423
424 if (wtype != rpcrdma_writech)
425 goto done;
426
427 seg = req->rl_segments;
428 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
429 rqst->rq_rcv_buf.head[0].iov_len,
430 wtype, seg);
431 if (nsegs < 0)
432 return nsegs;
433
434 if (encode_item_present(xdr) < 0)
435 return -EMSGSIZE;
436 segcount = xdr_reserve_space(xdr, sizeof(*segcount));
437 if (unlikely(!segcount))
438 return -EMSGSIZE;
439
440
441 nchunks = 0;
442 do {
443 seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
444 if (IS_ERR(seg))
445 return PTR_ERR(seg);
446 rpcrdma_mr_push(mr, &req->rl_registered);
447
448 if (encode_rdma_segment(xdr, mr) < 0)
449 return -EMSGSIZE;
450
451 trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
452 r_xprt->rx_stats.write_chunk_count++;
453 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
454 nchunks++;
455 nsegs -= mr->mr_nents;
456 } while (nsegs);
457
458
459 *segcount = cpu_to_be32(nchunks);
460
461done:
462 return encode_item_not_present(xdr);
463}
464
465
466
467
468
469
470
471
472
473
474
475
476
477static noinline int
478rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
479 struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
480{
481 struct xdr_stream *xdr = &req->rl_stream;
482 struct rpcrdma_mr_seg *seg;
483 struct rpcrdma_mr *mr;
484 int nsegs, nchunks;
485 __be32 *segcount;
486
487 if (wtype != rpcrdma_replych)
488 return encode_item_not_present(xdr);
489
490 seg = req->rl_segments;
491 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
492 if (nsegs < 0)
493 return nsegs;
494
495 if (encode_item_present(xdr) < 0)
496 return -EMSGSIZE;
497 segcount = xdr_reserve_space(xdr, sizeof(*segcount));
498 if (unlikely(!segcount))
499 return -EMSGSIZE;
500
501
502 nchunks = 0;
503 do {
504 seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
505 if (IS_ERR(seg))
506 return PTR_ERR(seg);
507 rpcrdma_mr_push(mr, &req->rl_registered);
508
509 if (encode_rdma_segment(xdr, mr) < 0)
510 return -EMSGSIZE;
511
512 trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
513 r_xprt->rx_stats.reply_chunk_count++;
514 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
515 nchunks++;
516 nsegs -= mr->mr_nents;
517 } while (nsegs);
518
519
520 *segcount = cpu_to_be32(nchunks);
521
522 return 0;
523}
524
525static void rpcrdma_sendctx_done(struct kref *kref)
526{
527 struct rpcrdma_req *req =
528 container_of(kref, struct rpcrdma_req, rl_kref);
529 struct rpcrdma_rep *rep = req->rl_reply;
530
531 rpcrdma_complete_rqst(rep);
532 rep->rr_rxprt->rx_stats.reply_waits_for_send++;
533}
534
535
536
537
538
539
540void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
541{
542 struct ib_sge *sge;
543
544 if (!sc->sc_unmap_count)
545 return;
546
547
548
549
550
551 for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
552 ++sge, --sc->sc_unmap_count)
553 ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
554 DMA_TO_DEVICE);
555
556 kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
557}
558
559
560
561static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
562 struct rpcrdma_req *req, u32 len)
563{
564 struct rpcrdma_sendctx *sc = req->rl_sendctx;
565 struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
566 struct ib_sge *sge = sc->sc_sges;
567
568 if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
569 goto out_regbuf;
570 sge->addr = rdmab_addr(rb);
571 sge->length = len;
572 sge->lkey = rdmab_lkey(rb);
573
574 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
575 DMA_TO_DEVICE);
576 sc->sc_wr.num_sge++;
577 return true;
578
579out_regbuf:
580 pr_err("rpcrdma: failed to DMA map a Send buffer\n");
581 return false;
582}
583
584
585
586
587static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt,
588 struct rpcrdma_req *req,
589 struct xdr_buf *xdr,
590 enum rpcrdma_chunktype rtype)
591{
592 struct rpcrdma_sendctx *sc = req->rl_sendctx;
593 unsigned int sge_no, page_base, len, remaining;
594 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
595 struct ib_sge *sge = sc->sc_sges;
596 struct page *page, **ppages;
597
598
599
600
601 if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
602 goto out_regbuf;
603 sc->sc_device = rdmab_device(rb);
604 sge_no = 1;
605 sge[sge_no].addr = rdmab_addr(rb);
606 sge[sge_no].length = xdr->head[0].iov_len;
607 sge[sge_no].lkey = rdmab_lkey(rb);
608 ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr,
609 sge[sge_no].length, DMA_TO_DEVICE);
610
611
612
613
614
615
616
617 if (rtype == rpcrdma_readch) {
618 len = xdr->tail[0].iov_len;
619
620
621 if (len < 4)
622 goto out;
623
624 page = virt_to_page(xdr->tail[0].iov_base);
625 page_base = offset_in_page(xdr->tail[0].iov_base);
626
627
628
629
630
631
632 page_base += len & 3;
633 len -= len & 3;
634 goto map_tail;
635 }
636
637
638
639
640 if (xdr->page_len) {
641 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
642 page_base = offset_in_page(xdr->page_base);
643 remaining = xdr->page_len;
644 while (remaining) {
645 sge_no++;
646 if (sge_no > RPCRDMA_MAX_SEND_SGES - 2)
647 goto out_mapping_overflow;
648
649 len = min_t(u32, PAGE_SIZE - page_base, remaining);
650 sge[sge_no].addr =
651 ib_dma_map_page(rdmab_device(rb), *ppages,
652 page_base, len, DMA_TO_DEVICE);
653 if (ib_dma_mapping_error(rdmab_device(rb),
654 sge[sge_no].addr))
655 goto out_mapping_err;
656 sge[sge_no].length = len;
657 sge[sge_no].lkey = rdmab_lkey(rb);
658
659 sc->sc_unmap_count++;
660 ppages++;
661 remaining -= len;
662 page_base = 0;
663 }
664 }
665
666
667
668
669
670
671 if (xdr->tail[0].iov_len) {
672 page = virt_to_page(xdr->tail[0].iov_base);
673 page_base = offset_in_page(xdr->tail[0].iov_base);
674 len = xdr->tail[0].iov_len;
675
676map_tail:
677 sge_no++;
678 sge[sge_no].addr =
679 ib_dma_map_page(rdmab_device(rb), page, page_base, len,
680 DMA_TO_DEVICE);
681 if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr))
682 goto out_mapping_err;
683 sge[sge_no].length = len;
684 sge[sge_no].lkey = rdmab_lkey(rb);
685 sc->sc_unmap_count++;
686 }
687
688out:
689 sc->sc_wr.num_sge += sge_no;
690 if (sc->sc_unmap_count)
691 kref_get(&req->rl_kref);
692 return true;
693
694out_regbuf:
695 pr_err("rpcrdma: failed to DMA map a Send buffer\n");
696 return false;
697
698out_mapping_overflow:
699 rpcrdma_sendctx_unmap(sc);
700 pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
701 return false;
702
703out_mapping_err:
704 rpcrdma_sendctx_unmap(sc);
705 trace_xprtrdma_dma_maperr(sge[sge_no].addr);
706 return false;
707}
708
709
710
711
712
713
714
715
716
717
718
719int
720rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
721 struct rpcrdma_req *req, u32 hdrlen,
722 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
723{
724 int ret;
725
726 ret = -EAGAIN;
727 req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
728 if (!req->rl_sendctx)
729 goto err;
730 req->rl_sendctx->sc_wr.num_sge = 0;
731 req->rl_sendctx->sc_unmap_count = 0;
732 req->rl_sendctx->sc_req = req;
733 kref_init(&req->rl_kref);
734
735 ret = -EIO;
736 if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
737 goto err;
738 if (rtype != rpcrdma_areadch)
739 if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype))
740 goto err;
741 return 0;
742
743err:
744 trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
745 return ret;
746}
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767int
768rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
769{
770 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
771 struct xdr_stream *xdr = &req->rl_stream;
772 enum rpcrdma_chunktype rtype, wtype;
773 bool ddp_allowed;
774 __be32 *p;
775 int ret;
776
777 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
778 xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
779 rqst);
780
781
782 ret = -EMSGSIZE;
783 p = xdr_reserve_space(xdr, 4 * sizeof(*p));
784 if (!p)
785 goto out_err;
786 *p++ = rqst->rq_xid;
787 *p++ = rpcrdma_version;
788 *p++ = cpu_to_be32(r_xprt->rx_buf.rb_max_requests);
789
790
791
792
793
794 ddp_allowed = !(rqst->rq_cred->cr_auth->au_flags &
795 RPCAUTH_AUTH_DATATOUCH);
796
797
798
799
800
801
802
803
804
805
806 if (rpcrdma_results_inline(r_xprt, rqst))
807 wtype = rpcrdma_noch;
808 else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
809 rpcrdma_nonpayload_inline(r_xprt, rqst))
810 wtype = rpcrdma_writech;
811 else
812 wtype = rpcrdma_replych;
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828 if (rpcrdma_args_inline(r_xprt, rqst)) {
829 *p++ = rdma_msg;
830 rtype = rpcrdma_noch;
831 } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) {
832 *p++ = rdma_msg;
833 rtype = rpcrdma_readch;
834 } else {
835 r_xprt->rx_stats.nomsg_call_count++;
836 *p++ = rdma_nomsg;
837 rtype = rpcrdma_areadch;
838 }
839
840
841
842
843
844 while (unlikely(!list_empty(&req->rl_registered))) {
845 struct rpcrdma_mr *mr;
846
847 mr = rpcrdma_mr_pop(&req->rl_registered);
848 rpcrdma_mr_recycle(mr);
849 }
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873 ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
874 if (ret)
875 goto out_err;
876 ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
877 if (ret)
878 goto out_err;
879 ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
880 if (ret)
881 goto out_err;
882
883 ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
884 &rqst->rq_snd_buf, rtype);
885 if (ret)
886 goto out_err;
887
888 trace_xprtrdma_marshal(req, rtype, wtype);
889 return 0;
890
891out_err:
892 trace_xprtrdma_marshal_failed(rqst, ret);
893 r_xprt->rx_stats.failed_marshal_count++;
894 frwr_reset(req);
895 return ret;
896}
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916static unsigned long
917rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
918{
919 unsigned long fixup_copy_count;
920 int i, npages, curlen;
921 char *destp;
922 struct page **ppages;
923 int page_base;
924
925
926
927
928 rqst->rq_rcv_buf.head[0].iov_base = srcp;
929 rqst->rq_private_buf.head[0].iov_base = srcp;
930
931
932
933
934 curlen = rqst->rq_rcv_buf.head[0].iov_len;
935 if (curlen > copy_len)
936 curlen = copy_len;
937 trace_xprtrdma_fixup(rqst, copy_len, curlen);
938 srcp += curlen;
939 copy_len -= curlen;
940
941 ppages = rqst->rq_rcv_buf.pages +
942 (rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
943 page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
944 fixup_copy_count = 0;
945 if (copy_len && rqst->rq_rcv_buf.page_len) {
946 int pagelist_len;
947
948 pagelist_len = rqst->rq_rcv_buf.page_len;
949 if (pagelist_len > copy_len)
950 pagelist_len = copy_len;
951 npages = PAGE_ALIGN(page_base + pagelist_len) >> PAGE_SHIFT;
952 for (i = 0; i < npages; i++) {
953 curlen = PAGE_SIZE - page_base;
954 if (curlen > pagelist_len)
955 curlen = pagelist_len;
956
957 trace_xprtrdma_fixup_pg(rqst, i, srcp,
958 copy_len, curlen);
959 destp = kmap_atomic(ppages[i]);
960 memcpy(destp + page_base, srcp, curlen);
961 flush_dcache_page(ppages[i]);
962 kunmap_atomic(destp);
963 srcp += curlen;
964 copy_len -= curlen;
965 fixup_copy_count += curlen;
966 pagelist_len -= curlen;
967 if (!pagelist_len)
968 break;
969 page_base = 0;
970 }
971
972
973
974
975
976
977
978 if (pad)
979 srcp -= pad;
980 }
981
982
983
984
985 if (copy_len || pad) {
986 rqst->rq_rcv_buf.tail[0].iov_base = srcp;
987 rqst->rq_private_buf.tail[0].iov_base = srcp;
988 }
989
990 return fixup_copy_count;
991}
992
993
994
995
996
997
998static bool
999rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
1000#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1001{
1002 struct xdr_stream *xdr = &rep->rr_stream;
1003 __be32 *p;
1004
1005 if (rep->rr_proc != rdma_msg)
1006 return false;
1007
1008
1009 p = xdr_inline_decode(xdr, 0);
1010
1011
1012 if (*p++ != xdr_zero)
1013 return false;
1014 if (*p++ != xdr_zero)
1015 return false;
1016 if (*p++ != xdr_zero)
1017 return false;
1018
1019
1020 if (*p++ != rep->rr_xid)
1021 return false;
1022 if (*p != cpu_to_be32(RPC_CALL))
1023 return false;
1024
1025
1026
1027
1028 p = xdr_inline_decode(xdr, 3 * sizeof(*p));
1029 if (unlikely(!p))
1030 goto out_short;
1031
1032 rpcrdma_bc_receive_call(r_xprt, rep);
1033 return true;
1034
1035out_short:
1036 pr_warn("RPC/RDMA short backward direction call\n");
1037 return true;
1038}
1039#else
1040{
1041 return false;
1042}
1043#endif
1044
1045static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
1046{
1047 u32 handle;
1048 u64 offset;
1049 __be32 *p;
1050
1051 p = xdr_inline_decode(xdr, 4 * sizeof(*p));
1052 if (unlikely(!p))
1053 return -EIO;
1054
1055 handle = be32_to_cpup(p++);
1056 *length = be32_to_cpup(p++);
1057 xdr_decode_hyper(p, &offset);
1058
1059 trace_xprtrdma_decode_seg(handle, *length, offset);
1060 return 0;
1061}
1062
1063static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
1064{
1065 u32 segcount, seglength;
1066 __be32 *p;
1067
1068 p = xdr_inline_decode(xdr, sizeof(*p));
1069 if (unlikely(!p))
1070 return -EIO;
1071
1072 *length = 0;
1073 segcount = be32_to_cpup(p);
1074 while (segcount--) {
1075 if (decode_rdma_segment(xdr, &seglength))
1076 return -EIO;
1077 *length += seglength;
1078 }
1079
1080 return 0;
1081}
1082
1083
1084
1085
1086
1087static int decode_read_list(struct xdr_stream *xdr)
1088{
1089 __be32 *p;
1090
1091 p = xdr_inline_decode(xdr, sizeof(*p));
1092 if (unlikely(!p))
1093 return -EIO;
1094 if (unlikely(*p != xdr_zero))
1095 return -EIO;
1096 return 0;
1097}
1098
1099
1100
1101static int decode_write_list(struct xdr_stream *xdr, u32 *length)
1102{
1103 u32 chunklen;
1104 bool first;
1105 __be32 *p;
1106
1107 *length = 0;
1108 first = true;
1109 do {
1110 p = xdr_inline_decode(xdr, sizeof(*p));
1111 if (unlikely(!p))
1112 return -EIO;
1113 if (*p == xdr_zero)
1114 break;
1115 if (!first)
1116 return -EIO;
1117
1118 if (decode_write_chunk(xdr, &chunklen))
1119 return -EIO;
1120 *length += chunklen;
1121 first = false;
1122 } while (true);
1123 return 0;
1124}
1125
1126static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
1127{
1128 __be32 *p;
1129
1130 p = xdr_inline_decode(xdr, sizeof(*p));
1131 if (unlikely(!p))
1132 return -EIO;
1133
1134 *length = 0;
1135 if (*p != xdr_zero)
1136 if (decode_write_chunk(xdr, length))
1137 return -EIO;
1138 return 0;
1139}
1140
1141static int
1142rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1143 struct rpc_rqst *rqst)
1144{
1145 struct xdr_stream *xdr = &rep->rr_stream;
1146 u32 writelist, replychunk, rpclen;
1147 char *base;
1148
1149
1150 if (decode_read_list(xdr))
1151 return -EIO;
1152 if (decode_write_list(xdr, &writelist))
1153 return -EIO;
1154 if (decode_reply_chunk(xdr, &replychunk))
1155 return -EIO;
1156
1157
1158 if (unlikely(replychunk))
1159 return -EIO;
1160
1161
1162 base = (char *)xdr_inline_decode(xdr, 0);
1163 rpclen = xdr_stream_remaining(xdr);
1164 r_xprt->rx_stats.fixup_copy_count +=
1165 rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3);
1166
1167 r_xprt->rx_stats.total_rdma_reply += writelist;
1168 return rpclen + xdr_align_size(writelist);
1169}
1170
1171static noinline int
1172rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
1173{
1174 struct xdr_stream *xdr = &rep->rr_stream;
1175 u32 writelist, replychunk;
1176
1177
1178 if (decode_read_list(xdr))
1179 return -EIO;
1180 if (decode_write_list(xdr, &writelist))
1181 return -EIO;
1182 if (decode_reply_chunk(xdr, &replychunk))
1183 return -EIO;
1184
1185
1186 if (unlikely(writelist))
1187 return -EIO;
1188 if (unlikely(!replychunk))
1189 return -EIO;
1190
1191
1192 r_xprt->rx_stats.total_rdma_reply += replychunk;
1193 return replychunk;
1194}
1195
1196static noinline int
1197rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1198 struct rpc_rqst *rqst)
1199{
1200 struct xdr_stream *xdr = &rep->rr_stream;
1201 __be32 *p;
1202
1203 p = xdr_inline_decode(xdr, sizeof(*p));
1204 if (unlikely(!p))
1205 return -EIO;
1206
1207 switch (*p) {
1208 case err_vers:
1209 p = xdr_inline_decode(xdr, 2 * sizeof(*p));
1210 if (!p)
1211 break;
1212 dprintk("RPC: %s: server reports "
1213 "version error (%u-%u), xid %08x\n", __func__,
1214 be32_to_cpup(p), be32_to_cpu(*(p + 1)),
1215 be32_to_cpu(rep->rr_xid));
1216 break;
1217 case err_chunk:
1218 dprintk("RPC: %s: server reports "
1219 "header decoding error, xid %08x\n", __func__,
1220 be32_to_cpu(rep->rr_xid));
1221 break;
1222 default:
1223 dprintk("RPC: %s: server reports "
1224 "unrecognized error %d, xid %08x\n", __func__,
1225 be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
1226 }
1227
1228 r_xprt->rx_stats.bad_reply_count++;
1229 return -EREMOTEIO;
1230}
1231
1232
1233
1234
1235
1236void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1237{
1238 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1239 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1240 struct rpc_rqst *rqst = rep->rr_rqst;
1241 int status;
1242
1243 xprt->reestablish_timeout = 0;
1244
1245 switch (rep->rr_proc) {
1246 case rdma_msg:
1247 status = rpcrdma_decode_msg(r_xprt, rep, rqst);
1248 break;
1249 case rdma_nomsg:
1250 status = rpcrdma_decode_nomsg(r_xprt, rep);
1251 break;
1252 case rdma_error:
1253 status = rpcrdma_decode_error(r_xprt, rep, rqst);
1254 break;
1255 default:
1256 status = -EIO;
1257 }
1258 if (status < 0)
1259 goto out_badheader;
1260
1261out:
1262 spin_lock(&xprt->queue_lock);
1263 xprt_complete_rqst(rqst->rq_task, status);
1264 xprt_unpin_rqst(rqst);
1265 spin_unlock(&xprt->queue_lock);
1266 return;
1267
1268
1269
1270
1271
1272out_badheader:
1273 trace_xprtrdma_reply_hdr(rep);
1274 r_xprt->rx_stats.bad_reply_count++;
1275 goto out;
1276}
1277
1278static void rpcrdma_reply_done(struct kref *kref)
1279{
1280 struct rpcrdma_req *req =
1281 container_of(kref, struct rpcrdma_req, rl_kref);
1282
1283 rpcrdma_complete_rqst(req->rl_reply);
1284}
1285
1286
1287
1288
1289
1290
1291
1292
1293void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1294{
1295 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1296 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1297 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1298 struct rpcrdma_req *req;
1299 struct rpc_rqst *rqst;
1300 u32 credits;
1301 __be32 *p;
1302
1303
1304 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
1305 rep->rr_hdrbuf.head[0].iov_base, NULL);
1306 p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
1307 if (unlikely(!p))
1308 goto out_shortreply;
1309 rep->rr_xid = *p++;
1310 rep->rr_vers = *p++;
1311 credits = be32_to_cpu(*p++);
1312 rep->rr_proc = *p++;
1313
1314 if (rep->rr_vers != rpcrdma_version)
1315 goto out_badversion;
1316
1317 if (rpcrdma_is_bcall(r_xprt, rep))
1318 return;
1319
1320
1321
1322
1323 spin_lock(&xprt->queue_lock);
1324 rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
1325 if (!rqst)
1326 goto out_norqst;
1327 xprt_pin_rqst(rqst);
1328 spin_unlock(&xprt->queue_lock);
1329
1330 if (credits == 0)
1331 credits = 1;
1332 else if (credits > buf->rb_max_requests)
1333 credits = buf->rb_max_requests;
1334 if (buf->rb_credits != credits) {
1335 spin_lock(&xprt->transport_lock);
1336 buf->rb_credits = credits;
1337 xprt->cwnd = credits << RPC_CWNDSHIFT;
1338 spin_unlock(&xprt->transport_lock);
1339 }
1340
1341 req = rpcr_to_rdmar(rqst);
1342 if (req->rl_reply) {
1343 trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
1344 rpcrdma_recv_buffer_put(req->rl_reply);
1345 }
1346 req->rl_reply = rep;
1347 rep->rr_rqst = rqst;
1348
1349 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
1350
1351 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1352 frwr_reminv(rep, &req->rl_registered);
1353 if (!list_empty(&req->rl_registered))
1354 frwr_unmap_async(r_xprt, req);
1355
1356 else
1357 kref_put(&req->rl_kref, rpcrdma_reply_done);
1358 return;
1359
1360out_badversion:
1361 trace_xprtrdma_reply_vers(rep);
1362 goto out;
1363
1364out_norqst:
1365 spin_unlock(&xprt->queue_lock);
1366 trace_xprtrdma_reply_rqst(rep);
1367 goto out;
1368
1369out_shortreply:
1370 trace_xprtrdma_reply_short(rep);
1371
1372out:
1373 rpcrdma_recv_buffer_put(rep);
1374}
1375