1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50#include <linux/highmem.h>
51
52#include <linux/sunrpc/svc_rdma.h>
53
54#include "xprt_rdma.h"
55#include <trace/events/rpcrdma.h>
56
57#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
58# define RPCDBG_FACILITY RPCDBG_TRANS
59#endif
60
61
62
63
64
65
66static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
67{
68 unsigned int size;
69
70
71 size = RPCRDMA_HDRLEN_MIN;
72
73
74 size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
75
76
77 size += sizeof(__be32);
78 size += rpcrdma_segment_maxsz * sizeof(__be32);
79 size += sizeof(__be32);
80
81 return size;
82}
83
84
85
86
87
88
89static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
90{
91 unsigned int size;
92
93
94 size = RPCRDMA_HDRLEN_MIN;
95
96
97 size = sizeof(__be32);
98 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
99 size += sizeof(__be32);
100
101 return size;
102}
103
104
105
106
107
108
109
110
111
112void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
113{
114 unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs;
115 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
116
117 ep->rep_max_inline_send =
118 ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
119 ep->rep_max_inline_recv =
120 ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
121}
122
123
124
125
126
127
128
129
130
131static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
132 struct rpc_rqst *rqst)
133{
134 struct xdr_buf *xdr = &rqst->rq_snd_buf;
135 unsigned int count, remaining, offset;
136
137 if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
138 return false;
139
140 if (xdr->page_len) {
141 remaining = xdr->page_len;
142 offset = offset_in_page(xdr->page_base);
143 count = RPCRDMA_MIN_SEND_SGES;
144 while (remaining) {
145 remaining -= min_t(unsigned int,
146 PAGE_SIZE - offset, remaining);
147 offset = 0;
148 if (++count > r_xprt->rx_ia.ri_max_send_sges)
149 return false;
150 }
151 }
152
153 return true;
154}
155
156
157
158
159
160
161
162static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
163 struct rpc_rqst *rqst)
164{
165 return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
166}
167
168
169
170
171
172static bool
173rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
174 const struct rpc_rqst *rqst)
175{
176 const struct xdr_buf *buf = &rqst->rq_rcv_buf;
177
178 return (buf->head[0].iov_len + buf->tail[0].iov_len) <
179 r_xprt->rx_ep.rep_max_inline_recv;
180}
181
182
183
184
185
186
187
188
189static struct rpcrdma_mr_seg *
190rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
191 unsigned int *n)
192{
193 u32 remaining, page_offset;
194 char *base;
195
196 base = vec->iov_base;
197 page_offset = offset_in_page(base);
198 remaining = vec->iov_len;
199 while (remaining) {
200 seg->mr_page = NULL;
201 seg->mr_offset = base;
202 seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
203 remaining -= seg->mr_len;
204 base += seg->mr_len;
205 ++seg;
206 ++(*n);
207 page_offset = 0;
208 }
209 return seg;
210}
211
212
213
214
215
216
217
218
219static int
220rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
221 unsigned int pos, enum rpcrdma_chunktype type,
222 struct rpcrdma_mr_seg *seg)
223{
224 unsigned long page_base;
225 unsigned int len, n;
226 struct page **ppages;
227
228 n = 0;
229 if (pos == 0)
230 seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n);
231
232 len = xdrbuf->page_len;
233 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
234 page_base = offset_in_page(xdrbuf->page_base);
235 while (len) {
236
237
238
239 if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
240 if (!*ppages)
241 *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
242 if (!*ppages)
243 return -ENOBUFS;
244 }
245 seg->mr_page = *ppages;
246 seg->mr_offset = (char *)page_base;
247 seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
248 len -= seg->mr_len;
249 ++ppages;
250 ++seg;
251 ++n;
252 page_base = 0;
253 }
254
255
256
257
258 if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
259 goto out;
260
261
262
263
264
265
266 if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
267 goto out;
268
269 if (xdrbuf->tail[0].iov_len)
270 seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
271
272out:
273 if (unlikely(n > RPCRDMA_MAX_SEGS))
274 return -EIO;
275 return n;
276}
277
278static inline int
279encode_item_present(struct xdr_stream *xdr)
280{
281 __be32 *p;
282
283 p = xdr_reserve_space(xdr, sizeof(*p));
284 if (unlikely(!p))
285 return -EMSGSIZE;
286
287 *p = xdr_one;
288 return 0;
289}
290
291static inline int
292encode_item_not_present(struct xdr_stream *xdr)
293{
294 __be32 *p;
295
296 p = xdr_reserve_space(xdr, sizeof(*p));
297 if (unlikely(!p))
298 return -EMSGSIZE;
299
300 *p = xdr_zero;
301 return 0;
302}
303
304static void
305xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
306{
307 *iptr++ = cpu_to_be32(mr->mr_handle);
308 *iptr++ = cpu_to_be32(mr->mr_length);
309 xdr_encode_hyper(iptr, mr->mr_offset);
310}
311
312static int
313encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
314{
315 __be32 *p;
316
317 p = xdr_reserve_space(xdr, 4 * sizeof(*p));
318 if (unlikely(!p))
319 return -EMSGSIZE;
320
321 xdr_encode_rdma_segment(p, mr);
322 return 0;
323}
324
325static int
326encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
327 u32 position)
328{
329 __be32 *p;
330
331 p = xdr_reserve_space(xdr, 6 * sizeof(*p));
332 if (unlikely(!p))
333 return -EMSGSIZE;
334
335 *p++ = xdr_one;
336 *p++ = cpu_to_be32(position);
337 xdr_encode_rdma_segment(p, mr);
338 return 0;
339}
340
341static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
342 struct rpcrdma_req *req,
343 struct rpcrdma_mr_seg *seg,
344 int nsegs, bool writing,
345 struct rpcrdma_mr **mr)
346{
347 *mr = rpcrdma_mr_pop(&req->rl_free_mrs);
348 if (!*mr) {
349 *mr = rpcrdma_mr_get(r_xprt);
350 if (!*mr)
351 goto out_getmr_err;
352 trace_xprtrdma_mr_get(req);
353 (*mr)->mr_req = req;
354 }
355
356 rpcrdma_mr_push(*mr, &req->rl_registered);
357 return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
358
359out_getmr_err:
360 trace_xprtrdma_nomrs(req);
361 xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
362 rpcrdma_mrs_refresh(r_xprt);
363 return ERR_PTR(-EAGAIN);
364}
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
381 struct rpcrdma_req *req,
382 struct rpc_rqst *rqst,
383 enum rpcrdma_chunktype rtype)
384{
385 struct xdr_stream *xdr = &req->rl_stream;
386 struct rpcrdma_mr_seg *seg;
387 struct rpcrdma_mr *mr;
388 unsigned int pos;
389 int nsegs;
390
391 if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped)
392 goto done;
393
394 pos = rqst->rq_snd_buf.head[0].iov_len;
395 if (rtype == rpcrdma_areadch)
396 pos = 0;
397 seg = req->rl_segments;
398 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
399 rtype, seg);
400 if (nsegs < 0)
401 return nsegs;
402
403 do {
404 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
405 if (IS_ERR(seg))
406 return PTR_ERR(seg);
407
408 if (encode_read_segment(xdr, mr, pos) < 0)
409 return -EMSGSIZE;
410
411 trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
412 r_xprt->rx_stats.read_chunk_count++;
413 nsegs -= mr->mr_nents;
414 } while (nsegs);
415
416done:
417 return encode_item_not_present(xdr);
418}
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
436 struct rpcrdma_req *req,
437 struct rpc_rqst *rqst,
438 enum rpcrdma_chunktype wtype)
439{
440 struct xdr_stream *xdr = &req->rl_stream;
441 struct rpcrdma_mr_seg *seg;
442 struct rpcrdma_mr *mr;
443 int nsegs, nchunks;
444 __be32 *segcount;
445
446 if (wtype != rpcrdma_writech)
447 goto done;
448
449 seg = req->rl_segments;
450 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
451 rqst->rq_rcv_buf.head[0].iov_len,
452 wtype, seg);
453 if (nsegs < 0)
454 return nsegs;
455
456 if (encode_item_present(xdr) < 0)
457 return -EMSGSIZE;
458 segcount = xdr_reserve_space(xdr, sizeof(*segcount));
459 if (unlikely(!segcount))
460 return -EMSGSIZE;
461
462
463 nchunks = 0;
464 do {
465 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
466 if (IS_ERR(seg))
467 return PTR_ERR(seg);
468
469 if (encode_rdma_segment(xdr, mr) < 0)
470 return -EMSGSIZE;
471
472 trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
473 r_xprt->rx_stats.write_chunk_count++;
474 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
475 nchunks++;
476 nsegs -= mr->mr_nents;
477 } while (nsegs);
478
479
480 *segcount = cpu_to_be32(nchunks);
481
482done:
483 return encode_item_not_present(xdr);
484}
485
486
487
488
489
490
491
492
493
494
495
496
497
498static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
499 struct rpcrdma_req *req,
500 struct rpc_rqst *rqst,
501 enum rpcrdma_chunktype wtype)
502{
503 struct xdr_stream *xdr = &req->rl_stream;
504 struct rpcrdma_mr_seg *seg;
505 struct rpcrdma_mr *mr;
506 int nsegs, nchunks;
507 __be32 *segcount;
508
509 if (wtype != rpcrdma_replych)
510 return encode_item_not_present(xdr);
511
512 seg = req->rl_segments;
513 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
514 if (nsegs < 0)
515 return nsegs;
516
517 if (encode_item_present(xdr) < 0)
518 return -EMSGSIZE;
519 segcount = xdr_reserve_space(xdr, sizeof(*segcount));
520 if (unlikely(!segcount))
521 return -EMSGSIZE;
522
523
524 nchunks = 0;
525 do {
526 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
527 if (IS_ERR(seg))
528 return PTR_ERR(seg);
529
530 if (encode_rdma_segment(xdr, mr) < 0)
531 return -EMSGSIZE;
532
533 trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
534 r_xprt->rx_stats.reply_chunk_count++;
535 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
536 nchunks++;
537 nsegs -= mr->mr_nents;
538 } while (nsegs);
539
540
541 *segcount = cpu_to_be32(nchunks);
542
543 return 0;
544}
545
546static void rpcrdma_sendctx_done(struct kref *kref)
547{
548 struct rpcrdma_req *req =
549 container_of(kref, struct rpcrdma_req, rl_kref);
550 struct rpcrdma_rep *rep = req->rl_reply;
551
552 rpcrdma_complete_rqst(rep);
553 rep->rr_rxprt->rx_stats.reply_waits_for_send++;
554}
555
556
557
558
559
560
561void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
562{
563 struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf;
564 struct ib_sge *sge;
565
566 if (!sc->sc_unmap_count)
567 return;
568
569
570
571
572
573 for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
574 ++sge, --sc->sc_unmap_count)
575 ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length,
576 DMA_TO_DEVICE);
577
578 kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
579}
580
581
582
583static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
584 struct rpcrdma_req *req, u32 len)
585{
586 struct rpcrdma_sendctx *sc = req->rl_sendctx;
587 struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
588 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
589
590 if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
591 return false;
592 sge->addr = rdmab_addr(rb);
593 sge->length = len;
594 sge->lkey = rdmab_lkey(rb);
595
596 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
597 DMA_TO_DEVICE);
598 return true;
599}
600
601
602
603
604static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt,
605 struct rpcrdma_req *req, unsigned int len)
606{
607 struct rpcrdma_sendctx *sc = req->rl_sendctx;
608 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
609 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
610
611 if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
612 return false;
613
614 sge->addr = rdmab_addr(rb);
615 sge->length = len;
616 sge->lkey = rdmab_lkey(rb);
617
618 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
619 DMA_TO_DEVICE);
620 return true;
621}
622
623
624
625
626static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req,
627 struct xdr_buf *xdr)
628{
629 struct rpcrdma_sendctx *sc = req->rl_sendctx;
630 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
631 unsigned int page_base, len, remaining;
632 struct page **ppages;
633 struct ib_sge *sge;
634
635 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
636 page_base = offset_in_page(xdr->page_base);
637 remaining = xdr->page_len;
638 while (remaining) {
639 sge = &sc->sc_sges[req->rl_wr.num_sge++];
640 len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
641 sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages,
642 page_base, len, DMA_TO_DEVICE);
643 if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
644 goto out_mapping_err;
645
646 sge->length = len;
647 sge->lkey = rdmab_lkey(rb);
648
649 sc->sc_unmap_count++;
650 ppages++;
651 remaining -= len;
652 page_base = 0;
653 }
654
655 return true;
656
657out_mapping_err:
658 trace_xprtrdma_dma_maperr(sge->addr);
659 return false;
660}
661
662
663
664
665
666static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
667 struct xdr_buf *xdr,
668 unsigned int page_base, unsigned int len)
669{
670 struct rpcrdma_sendctx *sc = req->rl_sendctx;
671 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
672 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
673 struct page *page = virt_to_page(xdr->tail[0].iov_base);
674
675 sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len,
676 DMA_TO_DEVICE);
677 if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
678 goto out_mapping_err;
679
680 sge->length = len;
681 sge->lkey = rdmab_lkey(rb);
682 ++sc->sc_unmap_count;
683 return true;
684
685out_mapping_err:
686 trace_xprtrdma_dma_maperr(sge->addr);
687 return false;
688}
689
690
691
692static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt,
693 struct rpcrdma_req *req,
694 struct xdr_buf *xdr)
695{
696 unsigned char *dst;
697
698 dst = (unsigned char *)xdr->head[0].iov_base;
699 dst += xdr->head[0].iov_len + xdr->page_len;
700 memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len);
701 r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len;
702}
703
704
705
706static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt,
707 struct rpcrdma_req *req,
708 struct xdr_buf *xdr)
709{
710 unsigned int len, page_base, remaining;
711 struct page **ppages;
712 unsigned char *src, *dst;
713
714 dst = (unsigned char *)xdr->head[0].iov_base;
715 dst += xdr->head[0].iov_len;
716 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
717 page_base = offset_in_page(xdr->page_base);
718 remaining = xdr->page_len;
719 while (remaining) {
720 src = page_address(*ppages);
721 src += page_base;
722 len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
723 memcpy(dst, src, len);
724 r_xprt->rx_stats.pullup_copy_count += len;
725
726 ppages++;
727 dst += len;
728 remaining -= len;
729 page_base = 0;
730 }
731}
732
733
734
735
736
737
738
739
740
741
742static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt,
743 struct rpcrdma_req *req,
744 struct xdr_buf *xdr)
745{
746 if (unlikely(xdr->tail[0].iov_len))
747 rpcrdma_pullup_tail_iov(r_xprt, req, xdr);
748
749 if (unlikely(xdr->page_len))
750 rpcrdma_pullup_pagelist(r_xprt, req, xdr);
751
752
753 return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len);
754}
755
756static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt,
757 struct rpcrdma_req *req,
758 struct xdr_buf *xdr)
759{
760 struct kvec *tail = &xdr->tail[0];
761
762 if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
763 return false;
764 if (xdr->page_len)
765 if (!rpcrdma_prepare_pagelist(req, xdr))
766 return false;
767 if (tail->iov_len)
768 if (!rpcrdma_prepare_tail_iov(req, xdr,
769 offset_in_page(tail->iov_base),
770 tail->iov_len))
771 return false;
772
773 if (req->rl_sendctx->sc_unmap_count)
774 kref_get(&req->rl_kref);
775 return true;
776}
777
778static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
779 struct rpcrdma_req *req,
780 struct xdr_buf *xdr)
781{
782 if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
783 return false;
784
785
786
787
788
789
790 if (xdr->tail[0].iov_len > 3) {
791 unsigned int page_base, len;
792
793
794
795
796
797
798 page_base = offset_in_page(xdr->tail[0].iov_base);
799 len = xdr->tail[0].iov_len;
800 page_base += len & 3;
801 len -= len & 3;
802 if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
803 return false;
804 kref_get(&req->rl_kref);
805 }
806
807 return true;
808}
809
810
811
812
813
814
815
816
817
818
819
820inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
821 struct rpcrdma_req *req, u32 hdrlen,
822 struct xdr_buf *xdr,
823 enum rpcrdma_chunktype rtype)
824{
825 int ret;
826
827 ret = -EAGAIN;
828 req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
829 if (!req->rl_sendctx)
830 goto out_nosc;
831 req->rl_sendctx->sc_unmap_count = 0;
832 req->rl_sendctx->sc_req = req;
833 kref_init(&req->rl_kref);
834 req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe;
835 req->rl_wr.sg_list = req->rl_sendctx->sc_sges;
836 req->rl_wr.num_sge = 0;
837 req->rl_wr.opcode = IB_WR_SEND;
838
839 ret = -EIO;
840 if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
841 goto out_unmap;
842
843 switch (rtype) {
844 case rpcrdma_noch_pullup:
845 if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr))
846 goto out_unmap;
847 break;
848 case rpcrdma_noch_mapped:
849 if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr))
850 goto out_unmap;
851 break;
852 case rpcrdma_readch:
853 if (!rpcrdma_prepare_readch(r_xprt, req, xdr))
854 goto out_unmap;
855 break;
856 case rpcrdma_areadch:
857 break;
858 default:
859 goto out_unmap;
860 }
861
862 return 0;
863
864out_unmap:
865 rpcrdma_sendctx_unmap(req->rl_sendctx);
866out_nosc:
867 trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
868 return ret;
869}
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890int
891rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
892{
893 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
894 struct xdr_stream *xdr = &req->rl_stream;
895 enum rpcrdma_chunktype rtype, wtype;
896 struct xdr_buf *buf = &rqst->rq_snd_buf;
897 bool ddp_allowed;
898 __be32 *p;
899 int ret;
900
901 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
902 xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
903 rqst);
904
905
906 ret = -EMSGSIZE;
907 p = xdr_reserve_space(xdr, 4 * sizeof(*p));
908 if (!p)
909 goto out_err;
910 *p++ = rqst->rq_xid;
911 *p++ = rpcrdma_version;
912 *p++ = cpu_to_be32(r_xprt->rx_buf.rb_max_requests);
913
914
915
916
917
918 ddp_allowed = !(rqst->rq_cred->cr_auth->au_flags &
919 RPCAUTH_AUTH_DATATOUCH);
920
921
922
923
924
925
926
927
928
929
930 if (rpcrdma_results_inline(r_xprt, rqst))
931 wtype = rpcrdma_noch;
932 else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
933 rpcrdma_nonpayload_inline(r_xprt, rqst))
934 wtype = rpcrdma_writech;
935 else
936 wtype = rpcrdma_replych;
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952 if (rpcrdma_args_inline(r_xprt, rqst)) {
953 *p++ = rdma_msg;
954 rtype = buf->len < rdmab_length(req->rl_sendbuf) ?
955 rpcrdma_noch_pullup : rpcrdma_noch_mapped;
956 } else if (ddp_allowed && buf->flags & XDRBUF_WRITE) {
957 *p++ = rdma_msg;
958 rtype = rpcrdma_readch;
959 } else {
960 r_xprt->rx_stats.nomsg_call_count++;
961 *p++ = rdma_nomsg;
962 rtype = rpcrdma_areadch;
963 }
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987 ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
988 if (ret)
989 goto out_err;
990 ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
991 if (ret)
992 goto out_err;
993 ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
994 if (ret)
995 goto out_err;
996
997 ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
998 buf, rtype);
999 if (ret)
1000 goto out_err;
1001
1002 trace_xprtrdma_marshal(req, rtype, wtype);
1003 return 0;
1004
1005out_err:
1006 trace_xprtrdma_marshal_failed(rqst, ret);
1007 r_xprt->rx_stats.failed_marshal_count++;
1008 frwr_reset(req);
1009 return ret;
1010}
1011
1012static void __rpcrdma_update_cwnd_locked(struct rpc_xprt *xprt,
1013 struct rpcrdma_buffer *buf,
1014 u32 grant)
1015{
1016 buf->rb_credits = grant;
1017 xprt->cwnd = grant << RPC_CWNDSHIFT;
1018}
1019
1020static void rpcrdma_update_cwnd(struct rpcrdma_xprt *r_xprt, u32 grant)
1021{
1022 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1023
1024 spin_lock(&xprt->transport_lock);
1025 __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, grant);
1026 spin_unlock(&xprt->transport_lock);
1027}
1028
1029
1030
1031
1032
1033
1034
1035
1036void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt)
1037{
1038 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1039
1040 spin_lock(&xprt->transport_lock);
1041 xprt->cong = 0;
1042 __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, 1);
1043 spin_unlock(&xprt->transport_lock);
1044}
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064static unsigned long
1065rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
1066{
1067 unsigned long fixup_copy_count;
1068 int i, npages, curlen;
1069 char *destp;
1070 struct page **ppages;
1071 int page_base;
1072
1073
1074
1075
1076 rqst->rq_rcv_buf.head[0].iov_base = srcp;
1077 rqst->rq_private_buf.head[0].iov_base = srcp;
1078
1079
1080
1081
1082 curlen = rqst->rq_rcv_buf.head[0].iov_len;
1083 if (curlen > copy_len)
1084 curlen = copy_len;
1085 srcp += curlen;
1086 copy_len -= curlen;
1087
1088 ppages = rqst->rq_rcv_buf.pages +
1089 (rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
1090 page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
1091 fixup_copy_count = 0;
1092 if (copy_len && rqst->rq_rcv_buf.page_len) {
1093 int pagelist_len;
1094
1095 pagelist_len = rqst->rq_rcv_buf.page_len;
1096 if (pagelist_len > copy_len)
1097 pagelist_len = copy_len;
1098 npages = PAGE_ALIGN(page_base + pagelist_len) >> PAGE_SHIFT;
1099 for (i = 0; i < npages; i++) {
1100 curlen = PAGE_SIZE - page_base;
1101 if (curlen > pagelist_len)
1102 curlen = pagelist_len;
1103
1104 destp = kmap_atomic(ppages[i]);
1105 memcpy(destp + page_base, srcp, curlen);
1106 flush_dcache_page(ppages[i]);
1107 kunmap_atomic(destp);
1108 srcp += curlen;
1109 copy_len -= curlen;
1110 fixup_copy_count += curlen;
1111 pagelist_len -= curlen;
1112 if (!pagelist_len)
1113 break;
1114 page_base = 0;
1115 }
1116
1117
1118
1119
1120
1121
1122
1123 if (pad)
1124 srcp -= pad;
1125 }
1126
1127
1128
1129
1130 if (copy_len || pad) {
1131 rqst->rq_rcv_buf.tail[0].iov_base = srcp;
1132 rqst->rq_private_buf.tail[0].iov_base = srcp;
1133 }
1134
1135 if (fixup_copy_count)
1136 trace_xprtrdma_fixup(rqst, fixup_copy_count);
1137 return fixup_copy_count;
1138}
1139
1140
1141
1142
1143
1144
1145static bool
1146rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
1147#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1148{
1149 struct xdr_stream *xdr = &rep->rr_stream;
1150 __be32 *p;
1151
1152 if (rep->rr_proc != rdma_msg)
1153 return false;
1154
1155
1156 p = xdr_inline_decode(xdr, 0);
1157
1158
1159 if (*p++ != xdr_zero)
1160 return false;
1161 if (*p++ != xdr_zero)
1162 return false;
1163 if (*p++ != xdr_zero)
1164 return false;
1165
1166
1167 if (*p++ != rep->rr_xid)
1168 return false;
1169 if (*p != cpu_to_be32(RPC_CALL))
1170 return false;
1171
1172
1173
1174
1175 p = xdr_inline_decode(xdr, 3 * sizeof(*p));
1176 if (unlikely(!p))
1177 goto out_short;
1178
1179 rpcrdma_bc_receive_call(r_xprt, rep);
1180 return true;
1181
1182out_short:
1183 pr_warn("RPC/RDMA short backward direction call\n");
1184 return true;
1185}
1186#else
1187{
1188 return false;
1189}
1190#endif
1191
1192static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
1193{
1194 u32 handle;
1195 u64 offset;
1196 __be32 *p;
1197
1198 p = xdr_inline_decode(xdr, 4 * sizeof(*p));
1199 if (unlikely(!p))
1200 return -EIO;
1201
1202 handle = be32_to_cpup(p++);
1203 *length = be32_to_cpup(p++);
1204 xdr_decode_hyper(p, &offset);
1205
1206 trace_xprtrdma_decode_seg(handle, *length, offset);
1207 return 0;
1208}
1209
1210static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
1211{
1212 u32 segcount, seglength;
1213 __be32 *p;
1214
1215 p = xdr_inline_decode(xdr, sizeof(*p));
1216 if (unlikely(!p))
1217 return -EIO;
1218
1219 *length = 0;
1220 segcount = be32_to_cpup(p);
1221 while (segcount--) {
1222 if (decode_rdma_segment(xdr, &seglength))
1223 return -EIO;
1224 *length += seglength;
1225 }
1226
1227 return 0;
1228}
1229
1230
1231
1232
1233
1234static int decode_read_list(struct xdr_stream *xdr)
1235{
1236 __be32 *p;
1237
1238 p = xdr_inline_decode(xdr, sizeof(*p));
1239 if (unlikely(!p))
1240 return -EIO;
1241 if (unlikely(*p != xdr_zero))
1242 return -EIO;
1243 return 0;
1244}
1245
1246
1247
1248static int decode_write_list(struct xdr_stream *xdr, u32 *length)
1249{
1250 u32 chunklen;
1251 bool first;
1252 __be32 *p;
1253
1254 *length = 0;
1255 first = true;
1256 do {
1257 p = xdr_inline_decode(xdr, sizeof(*p));
1258 if (unlikely(!p))
1259 return -EIO;
1260 if (*p == xdr_zero)
1261 break;
1262 if (!first)
1263 return -EIO;
1264
1265 if (decode_write_chunk(xdr, &chunklen))
1266 return -EIO;
1267 *length += chunklen;
1268 first = false;
1269 } while (true);
1270 return 0;
1271}
1272
1273static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
1274{
1275 __be32 *p;
1276
1277 p = xdr_inline_decode(xdr, sizeof(*p));
1278 if (unlikely(!p))
1279 return -EIO;
1280
1281 *length = 0;
1282 if (*p != xdr_zero)
1283 if (decode_write_chunk(xdr, length))
1284 return -EIO;
1285 return 0;
1286}
1287
1288static int
1289rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1290 struct rpc_rqst *rqst)
1291{
1292 struct xdr_stream *xdr = &rep->rr_stream;
1293 u32 writelist, replychunk, rpclen;
1294 char *base;
1295
1296
1297 if (decode_read_list(xdr))
1298 return -EIO;
1299 if (decode_write_list(xdr, &writelist))
1300 return -EIO;
1301 if (decode_reply_chunk(xdr, &replychunk))
1302 return -EIO;
1303
1304
1305 if (unlikely(replychunk))
1306 return -EIO;
1307
1308
1309 base = (char *)xdr_inline_decode(xdr, 0);
1310 rpclen = xdr_stream_remaining(xdr);
1311 r_xprt->rx_stats.fixup_copy_count +=
1312 rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3);
1313
1314 r_xprt->rx_stats.total_rdma_reply += writelist;
1315 return rpclen + xdr_align_size(writelist);
1316}
1317
1318static noinline int
1319rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
1320{
1321 struct xdr_stream *xdr = &rep->rr_stream;
1322 u32 writelist, replychunk;
1323
1324
1325 if (decode_read_list(xdr))
1326 return -EIO;
1327 if (decode_write_list(xdr, &writelist))
1328 return -EIO;
1329 if (decode_reply_chunk(xdr, &replychunk))
1330 return -EIO;
1331
1332
1333 if (unlikely(writelist))
1334 return -EIO;
1335 if (unlikely(!replychunk))
1336 return -EIO;
1337
1338
1339 r_xprt->rx_stats.total_rdma_reply += replychunk;
1340 return replychunk;
1341}
1342
1343static noinline int
1344rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1345 struct rpc_rqst *rqst)
1346{
1347 struct xdr_stream *xdr = &rep->rr_stream;
1348 __be32 *p;
1349
1350 p = xdr_inline_decode(xdr, sizeof(*p));
1351 if (unlikely(!p))
1352 return -EIO;
1353
1354 switch (*p) {
1355 case err_vers:
1356 p = xdr_inline_decode(xdr, 2 * sizeof(*p));
1357 if (!p)
1358 break;
1359 dprintk("RPC: %s: server reports "
1360 "version error (%u-%u), xid %08x\n", __func__,
1361 be32_to_cpup(p), be32_to_cpu(*(p + 1)),
1362 be32_to_cpu(rep->rr_xid));
1363 break;
1364 case err_chunk:
1365 dprintk("RPC: %s: server reports "
1366 "header decoding error, xid %08x\n", __func__,
1367 be32_to_cpu(rep->rr_xid));
1368 break;
1369 default:
1370 dprintk("RPC: %s: server reports "
1371 "unrecognized error %d, xid %08x\n", __func__,
1372 be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
1373 }
1374
1375 r_xprt->rx_stats.bad_reply_count++;
1376 return -EREMOTEIO;
1377}
1378
1379
1380
1381
1382
1383void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1384{
1385 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1386 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1387 struct rpc_rqst *rqst = rep->rr_rqst;
1388 int status;
1389
1390 switch (rep->rr_proc) {
1391 case rdma_msg:
1392 status = rpcrdma_decode_msg(r_xprt, rep, rqst);
1393 break;
1394 case rdma_nomsg:
1395 status = rpcrdma_decode_nomsg(r_xprt, rep);
1396 break;
1397 case rdma_error:
1398 status = rpcrdma_decode_error(r_xprt, rep, rqst);
1399 break;
1400 default:
1401 status = -EIO;
1402 }
1403 if (status < 0)
1404 goto out_badheader;
1405
1406out:
1407 spin_lock(&xprt->queue_lock);
1408 xprt_complete_rqst(rqst->rq_task, status);
1409 xprt_unpin_rqst(rqst);
1410 spin_unlock(&xprt->queue_lock);
1411 return;
1412
1413
1414
1415
1416
1417out_badheader:
1418 trace_xprtrdma_reply_hdr(rep);
1419 r_xprt->rx_stats.bad_reply_count++;
1420 goto out;
1421}
1422
1423static void rpcrdma_reply_done(struct kref *kref)
1424{
1425 struct rpcrdma_req *req =
1426 container_of(kref, struct rpcrdma_req, rl_kref);
1427
1428 rpcrdma_complete_rqst(req->rl_reply);
1429}
1430
1431
1432
1433
1434
1435
1436
1437
1438void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1439{
1440 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1441 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1442 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1443 struct rpcrdma_req *req;
1444 struct rpc_rqst *rqst;
1445 u32 credits;
1446 __be32 *p;
1447
1448
1449
1450
1451 if (xprt->reestablish_timeout)
1452 xprt->reestablish_timeout = 0;
1453
1454
1455 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
1456 rep->rr_hdrbuf.head[0].iov_base, NULL);
1457 p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
1458 if (unlikely(!p))
1459 goto out_shortreply;
1460 rep->rr_xid = *p++;
1461 rep->rr_vers = *p++;
1462 credits = be32_to_cpu(*p++);
1463 rep->rr_proc = *p++;
1464
1465 if (rep->rr_vers != rpcrdma_version)
1466 goto out_badversion;
1467
1468 if (rpcrdma_is_bcall(r_xprt, rep))
1469 return;
1470
1471
1472
1473
1474 spin_lock(&xprt->queue_lock);
1475 rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
1476 if (!rqst)
1477 goto out_norqst;
1478 xprt_pin_rqst(rqst);
1479 spin_unlock(&xprt->queue_lock);
1480
1481 if (credits == 0)
1482 credits = 1;
1483 else if (credits > buf->rb_max_requests)
1484 credits = buf->rb_max_requests;
1485 if (buf->rb_credits != credits)
1486 rpcrdma_update_cwnd(r_xprt, credits);
1487 rpcrdma_post_recvs(r_xprt, false);
1488
1489 req = rpcr_to_rdmar(rqst);
1490 if (req->rl_reply) {
1491 trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
1492 rpcrdma_recv_buffer_put(req->rl_reply);
1493 }
1494 req->rl_reply = rep;
1495 rep->rr_rqst = rqst;
1496
1497 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
1498
1499 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1500 frwr_reminv(rep, &req->rl_registered);
1501 if (!list_empty(&req->rl_registered))
1502 frwr_unmap_async(r_xprt, req);
1503
1504 else
1505 kref_put(&req->rl_kref, rpcrdma_reply_done);
1506 return;
1507
1508out_badversion:
1509 trace_xprtrdma_reply_vers(rep);
1510 goto out;
1511
1512out_norqst:
1513 spin_unlock(&xprt->queue_lock);
1514 trace_xprtrdma_reply_rqst(rep);
1515 goto out;
1516
1517out_shortreply:
1518 trace_xprtrdma_reply_short(rep);
1519
1520out:
1521 rpcrdma_recv_buffer_put(rep);
1522}
1523