1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50#include <linux/highmem.h>
51
52#include <linux/sunrpc/svc_rdma.h>
53
54#include "xprt_rdma.h"
55#include <trace/events/rpcrdma.h>
56
57#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
58# define RPCDBG_FACILITY RPCDBG_TRANS
59#endif
60
61
62
63
64
65
66static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
67{
68 unsigned int size;
69
70
71 size = RPCRDMA_HDRLEN_MIN;
72
73
74 size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
75
76
77 size += sizeof(__be32);
78 size += rpcrdma_segment_maxsz * sizeof(__be32);
79 size += sizeof(__be32);
80
81 return size;
82}
83
84
85
86
87
88
89static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
90{
91 unsigned int size;
92
93
94 size = RPCRDMA_HDRLEN_MIN;
95
96
97 size = sizeof(__be32);
98 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
99 size += sizeof(__be32);
100
101 return size;
102}
103
104
105
106
107
108
109
110
111
112void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
113{
114 unsigned int maxsegs = r_xprt->rx_ia.ri_max_rdma_segs;
115 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
116
117 ep->rep_max_inline_send =
118 ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
119 ep->rep_max_inline_recv =
120 ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
121}
122
123
124
125
126
127
128
129
130
131static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
132 struct rpc_rqst *rqst)
133{
134 struct xdr_buf *xdr = &rqst->rq_snd_buf;
135 unsigned int count, remaining, offset;
136
137 if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
138 return false;
139
140 if (xdr->page_len) {
141 remaining = xdr->page_len;
142 offset = offset_in_page(xdr->page_base);
143 count = RPCRDMA_MIN_SEND_SGES;
144 while (remaining) {
145 remaining -= min_t(unsigned int,
146 PAGE_SIZE - offset, remaining);
147 offset = 0;
148 if (++count > r_xprt->rx_ep.rep_attr.cap.max_send_sge)
149 return false;
150 }
151 }
152
153 return true;
154}
155
156
157
158
159
160
161
162static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
163 struct rpc_rqst *rqst)
164{
165 return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
166}
167
168
169
170
171
172static bool
173rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
174 const struct rpc_rqst *rqst)
175{
176 const struct xdr_buf *buf = &rqst->rq_rcv_buf;
177
178 return (buf->head[0].iov_len + buf->tail[0].iov_len) <
179 r_xprt->rx_ep.rep_max_inline_recv;
180}
181
182
183
184
185
186
187
188
189static struct rpcrdma_mr_seg *
190rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
191 unsigned int *n)
192{
193 u32 remaining, page_offset;
194 char *base;
195
196 base = vec->iov_base;
197 page_offset = offset_in_page(base);
198 remaining = vec->iov_len;
199 while (remaining) {
200 seg->mr_page = NULL;
201 seg->mr_offset = base;
202 seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
203 remaining -= seg->mr_len;
204 base += seg->mr_len;
205 ++seg;
206 ++(*n);
207 page_offset = 0;
208 }
209 return seg;
210}
211
212
213
214
215
216
217
218
219static int
220rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
221 unsigned int pos, enum rpcrdma_chunktype type,
222 struct rpcrdma_mr_seg *seg)
223{
224 unsigned long page_base;
225 unsigned int len, n;
226 struct page **ppages;
227
228 n = 0;
229 if (pos == 0)
230 seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n);
231
232 len = xdrbuf->page_len;
233 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
234 page_base = offset_in_page(xdrbuf->page_base);
235 while (len) {
236
237
238
239 if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
240 if (!*ppages)
241 *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
242 if (!*ppages)
243 return -ENOBUFS;
244 }
245 seg->mr_page = *ppages;
246 seg->mr_offset = (char *)page_base;
247 seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
248 len -= seg->mr_len;
249 ++ppages;
250 ++seg;
251 ++n;
252 page_base = 0;
253 }
254
255
256
257
258 if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
259 goto out;
260
261
262
263
264
265
266 if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
267 goto out;
268
269 if (xdrbuf->tail[0].iov_len)
270 seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
271
272out:
273 if (unlikely(n > RPCRDMA_MAX_SEGS))
274 return -EIO;
275 return n;
276}
277
278static inline int
279encode_item_present(struct xdr_stream *xdr)
280{
281 __be32 *p;
282
283 p = xdr_reserve_space(xdr, sizeof(*p));
284 if (unlikely(!p))
285 return -EMSGSIZE;
286
287 *p = xdr_one;
288 return 0;
289}
290
291static inline int
292encode_item_not_present(struct xdr_stream *xdr)
293{
294 __be32 *p;
295
296 p = xdr_reserve_space(xdr, sizeof(*p));
297 if (unlikely(!p))
298 return -EMSGSIZE;
299
300 *p = xdr_zero;
301 return 0;
302}
303
304static void
305xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
306{
307 *iptr++ = cpu_to_be32(mr->mr_handle);
308 *iptr++ = cpu_to_be32(mr->mr_length);
309 xdr_encode_hyper(iptr, mr->mr_offset);
310}
311
312static int
313encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
314{
315 __be32 *p;
316
317 p = xdr_reserve_space(xdr, 4 * sizeof(*p));
318 if (unlikely(!p))
319 return -EMSGSIZE;
320
321 xdr_encode_rdma_segment(p, mr);
322 return 0;
323}
324
325static int
326encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
327 u32 position)
328{
329 __be32 *p;
330
331 p = xdr_reserve_space(xdr, 6 * sizeof(*p));
332 if (unlikely(!p))
333 return -EMSGSIZE;
334
335 *p++ = xdr_one;
336 *p++ = cpu_to_be32(position);
337 xdr_encode_rdma_segment(p, mr);
338 return 0;
339}
340
341static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
342 struct rpcrdma_req *req,
343 struct rpcrdma_mr_seg *seg,
344 int nsegs, bool writing,
345 struct rpcrdma_mr **mr)
346{
347 *mr = rpcrdma_mr_pop(&req->rl_free_mrs);
348 if (!*mr) {
349 *mr = rpcrdma_mr_get(r_xprt);
350 if (!*mr)
351 goto out_getmr_err;
352 trace_xprtrdma_mr_get(req);
353 (*mr)->mr_req = req;
354 }
355
356 rpcrdma_mr_push(*mr, &req->rl_registered);
357 return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
358
359out_getmr_err:
360 trace_xprtrdma_nomrs(req);
361 xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
362 rpcrdma_mrs_refresh(r_xprt);
363 return ERR_PTR(-EAGAIN);
364}
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
381 struct rpcrdma_req *req,
382 struct rpc_rqst *rqst,
383 enum rpcrdma_chunktype rtype)
384{
385 struct xdr_stream *xdr = &req->rl_stream;
386 struct rpcrdma_mr_seg *seg;
387 struct rpcrdma_mr *mr;
388 unsigned int pos;
389 int nsegs;
390
391 if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped)
392 goto done;
393
394 pos = rqst->rq_snd_buf.head[0].iov_len;
395 if (rtype == rpcrdma_areadch)
396 pos = 0;
397 seg = req->rl_segments;
398 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
399 rtype, seg);
400 if (nsegs < 0)
401 return nsegs;
402
403 do {
404 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
405 if (IS_ERR(seg))
406 return PTR_ERR(seg);
407
408 if (encode_read_segment(xdr, mr, pos) < 0)
409 return -EMSGSIZE;
410
411 trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
412 r_xprt->rx_stats.read_chunk_count++;
413 nsegs -= mr->mr_nents;
414 } while (nsegs);
415
416done:
417 return encode_item_not_present(xdr);
418}
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
436 struct rpcrdma_req *req,
437 struct rpc_rqst *rqst,
438 enum rpcrdma_chunktype wtype)
439{
440 struct xdr_stream *xdr = &req->rl_stream;
441 struct rpcrdma_mr_seg *seg;
442 struct rpcrdma_mr *mr;
443 int nsegs, nchunks;
444 __be32 *segcount;
445
446 if (wtype != rpcrdma_writech)
447 goto done;
448
449 seg = req->rl_segments;
450 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
451 rqst->rq_rcv_buf.head[0].iov_len,
452 wtype, seg);
453 if (nsegs < 0)
454 return nsegs;
455
456 if (encode_item_present(xdr) < 0)
457 return -EMSGSIZE;
458 segcount = xdr_reserve_space(xdr, sizeof(*segcount));
459 if (unlikely(!segcount))
460 return -EMSGSIZE;
461
462
463 nchunks = 0;
464 do {
465 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
466 if (IS_ERR(seg))
467 return PTR_ERR(seg);
468
469 if (encode_rdma_segment(xdr, mr) < 0)
470 return -EMSGSIZE;
471
472 trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
473 r_xprt->rx_stats.write_chunk_count++;
474 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
475 nchunks++;
476 nsegs -= mr->mr_nents;
477 } while (nsegs);
478
479
480 *segcount = cpu_to_be32(nchunks);
481
482done:
483 return encode_item_not_present(xdr);
484}
485
486
487
488
489
490
491
492
493
494
495
496
497
498static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
499 struct rpcrdma_req *req,
500 struct rpc_rqst *rqst,
501 enum rpcrdma_chunktype wtype)
502{
503 struct xdr_stream *xdr = &req->rl_stream;
504 struct rpcrdma_mr_seg *seg;
505 struct rpcrdma_mr *mr;
506 int nsegs, nchunks;
507 __be32 *segcount;
508
509 if (wtype != rpcrdma_replych)
510 return encode_item_not_present(xdr);
511
512 seg = req->rl_segments;
513 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
514 if (nsegs < 0)
515 return nsegs;
516
517 if (encode_item_present(xdr) < 0)
518 return -EMSGSIZE;
519 segcount = xdr_reserve_space(xdr, sizeof(*segcount));
520 if (unlikely(!segcount))
521 return -EMSGSIZE;
522
523
524 nchunks = 0;
525 do {
526 seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
527 if (IS_ERR(seg))
528 return PTR_ERR(seg);
529
530 if (encode_rdma_segment(xdr, mr) < 0)
531 return -EMSGSIZE;
532
533 trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
534 r_xprt->rx_stats.reply_chunk_count++;
535 r_xprt->rx_stats.total_rdma_request += mr->mr_length;
536 nchunks++;
537 nsegs -= mr->mr_nents;
538 } while (nsegs);
539
540
541 *segcount = cpu_to_be32(nchunks);
542
543 return 0;
544}
545
546static void rpcrdma_sendctx_done(struct kref *kref)
547{
548 struct rpcrdma_req *req =
549 container_of(kref, struct rpcrdma_req, rl_kref);
550 struct rpcrdma_rep *rep = req->rl_reply;
551
552 rpcrdma_complete_rqst(rep);
553 rep->rr_rxprt->rx_stats.reply_waits_for_send++;
554}
555
556
557
558
559
560
561void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
562{
563 struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf;
564 struct ib_sge *sge;
565
566 if (!sc->sc_unmap_count)
567 return;
568
569
570
571
572
573 for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
574 ++sge, --sc->sc_unmap_count)
575 ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length,
576 DMA_TO_DEVICE);
577
578 kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
579}
580
581
582
583static void rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
584 struct rpcrdma_req *req, u32 len)
585{
586 struct rpcrdma_sendctx *sc = req->rl_sendctx;
587 struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
588 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
589
590 sge->addr = rdmab_addr(rb);
591 sge->length = len;
592 sge->lkey = rdmab_lkey(rb);
593
594 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
595 DMA_TO_DEVICE);
596}
597
598
599
600
601static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt,
602 struct rpcrdma_req *req, unsigned int len)
603{
604 struct rpcrdma_sendctx *sc = req->rl_sendctx;
605 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
606 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
607
608 if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
609 return false;
610
611 sge->addr = rdmab_addr(rb);
612 sge->length = len;
613 sge->lkey = rdmab_lkey(rb);
614
615 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
616 DMA_TO_DEVICE);
617 return true;
618}
619
620
621
622
623static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req,
624 struct xdr_buf *xdr)
625{
626 struct rpcrdma_sendctx *sc = req->rl_sendctx;
627 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
628 unsigned int page_base, len, remaining;
629 struct page **ppages;
630 struct ib_sge *sge;
631
632 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
633 page_base = offset_in_page(xdr->page_base);
634 remaining = xdr->page_len;
635 while (remaining) {
636 sge = &sc->sc_sges[req->rl_wr.num_sge++];
637 len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
638 sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages,
639 page_base, len, DMA_TO_DEVICE);
640 if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
641 goto out_mapping_err;
642
643 sge->length = len;
644 sge->lkey = rdmab_lkey(rb);
645
646 sc->sc_unmap_count++;
647 ppages++;
648 remaining -= len;
649 page_base = 0;
650 }
651
652 return true;
653
654out_mapping_err:
655 trace_xprtrdma_dma_maperr(sge->addr);
656 return false;
657}
658
659
660
661
662
663static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
664 struct xdr_buf *xdr,
665 unsigned int page_base, unsigned int len)
666{
667 struct rpcrdma_sendctx *sc = req->rl_sendctx;
668 struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
669 struct rpcrdma_regbuf *rb = req->rl_sendbuf;
670 struct page *page = virt_to_page(xdr->tail[0].iov_base);
671
672 sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len,
673 DMA_TO_DEVICE);
674 if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
675 goto out_mapping_err;
676
677 sge->length = len;
678 sge->lkey = rdmab_lkey(rb);
679 ++sc->sc_unmap_count;
680 return true;
681
682out_mapping_err:
683 trace_xprtrdma_dma_maperr(sge->addr);
684 return false;
685}
686
687
688
689static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt,
690 struct rpcrdma_req *req,
691 struct xdr_buf *xdr)
692{
693 unsigned char *dst;
694
695 dst = (unsigned char *)xdr->head[0].iov_base;
696 dst += xdr->head[0].iov_len + xdr->page_len;
697 memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len);
698 r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len;
699}
700
701
702
703static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt,
704 struct rpcrdma_req *req,
705 struct xdr_buf *xdr)
706{
707 unsigned int len, page_base, remaining;
708 struct page **ppages;
709 unsigned char *src, *dst;
710
711 dst = (unsigned char *)xdr->head[0].iov_base;
712 dst += xdr->head[0].iov_len;
713 ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
714 page_base = offset_in_page(xdr->page_base);
715 remaining = xdr->page_len;
716 while (remaining) {
717 src = page_address(*ppages);
718 src += page_base;
719 len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
720 memcpy(dst, src, len);
721 r_xprt->rx_stats.pullup_copy_count += len;
722
723 ppages++;
724 dst += len;
725 remaining -= len;
726 page_base = 0;
727 }
728}
729
730
731
732
733
734
735
736
737
738
739static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt,
740 struct rpcrdma_req *req,
741 struct xdr_buf *xdr)
742{
743 if (unlikely(xdr->tail[0].iov_len))
744 rpcrdma_pullup_tail_iov(r_xprt, req, xdr);
745
746 if (unlikely(xdr->page_len))
747 rpcrdma_pullup_pagelist(r_xprt, req, xdr);
748
749
750 return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len);
751}
752
753static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt,
754 struct rpcrdma_req *req,
755 struct xdr_buf *xdr)
756{
757 struct kvec *tail = &xdr->tail[0];
758
759 if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
760 return false;
761 if (xdr->page_len)
762 if (!rpcrdma_prepare_pagelist(req, xdr))
763 return false;
764 if (tail->iov_len)
765 if (!rpcrdma_prepare_tail_iov(req, xdr,
766 offset_in_page(tail->iov_base),
767 tail->iov_len))
768 return false;
769
770 if (req->rl_sendctx->sc_unmap_count)
771 kref_get(&req->rl_kref);
772 return true;
773}
774
775static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
776 struct rpcrdma_req *req,
777 struct xdr_buf *xdr)
778{
779 if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
780 return false;
781
782
783
784
785
786
787 if (xdr->tail[0].iov_len > 3) {
788 unsigned int page_base, len;
789
790
791
792
793
794
795 page_base = offset_in_page(xdr->tail[0].iov_base);
796 len = xdr->tail[0].iov_len;
797 page_base += len & 3;
798 len -= len & 3;
799 if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
800 return false;
801 kref_get(&req->rl_kref);
802 }
803
804 return true;
805}
806
807
808
809
810
811
812
813
814
815
816
817inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
818 struct rpcrdma_req *req, u32 hdrlen,
819 struct xdr_buf *xdr,
820 enum rpcrdma_chunktype rtype)
821{
822 int ret;
823
824 ret = -EAGAIN;
825 req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
826 if (!req->rl_sendctx)
827 goto out_nosc;
828 req->rl_sendctx->sc_unmap_count = 0;
829 req->rl_sendctx->sc_req = req;
830 kref_init(&req->rl_kref);
831 req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe;
832 req->rl_wr.sg_list = req->rl_sendctx->sc_sges;
833 req->rl_wr.num_sge = 0;
834 req->rl_wr.opcode = IB_WR_SEND;
835
836 rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen);
837
838 ret = -EIO;
839 switch (rtype) {
840 case rpcrdma_noch_pullup:
841 if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr))
842 goto out_unmap;
843 break;
844 case rpcrdma_noch_mapped:
845 if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr))
846 goto out_unmap;
847 break;
848 case rpcrdma_readch:
849 if (!rpcrdma_prepare_readch(r_xprt, req, xdr))
850 goto out_unmap;
851 break;
852 case rpcrdma_areadch:
853 break;
854 default:
855 goto out_unmap;
856 }
857
858 return 0;
859
860out_unmap:
861 rpcrdma_sendctx_unmap(req->rl_sendctx);
862out_nosc:
863 trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
864 return ret;
865}
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886int
887rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
888{
889 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
890 struct xdr_stream *xdr = &req->rl_stream;
891 enum rpcrdma_chunktype rtype, wtype;
892 struct xdr_buf *buf = &rqst->rq_snd_buf;
893 bool ddp_allowed;
894 __be32 *p;
895 int ret;
896
897 rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
898 xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
899 rqst);
900
901
902 ret = -EMSGSIZE;
903 p = xdr_reserve_space(xdr, 4 * sizeof(*p));
904 if (!p)
905 goto out_err;
906 *p++ = rqst->rq_xid;
907 *p++ = rpcrdma_version;
908 *p++ = r_xprt->rx_buf.rb_max_requests;
909
910
911
912
913
914 ddp_allowed = !(rqst->rq_cred->cr_auth->au_flags &
915 RPCAUTH_AUTH_DATATOUCH);
916
917
918
919
920
921
922
923
924
925
926 if (rpcrdma_results_inline(r_xprt, rqst))
927 wtype = rpcrdma_noch;
928 else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
929 rpcrdma_nonpayload_inline(r_xprt, rqst))
930 wtype = rpcrdma_writech;
931 else
932 wtype = rpcrdma_replych;
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948 if (rpcrdma_args_inline(r_xprt, rqst)) {
949 *p++ = rdma_msg;
950 rtype = buf->len < rdmab_length(req->rl_sendbuf) ?
951 rpcrdma_noch_pullup : rpcrdma_noch_mapped;
952 } else if (ddp_allowed && buf->flags & XDRBUF_WRITE) {
953 *p++ = rdma_msg;
954 rtype = rpcrdma_readch;
955 } else {
956 r_xprt->rx_stats.nomsg_call_count++;
957 *p++ = rdma_nomsg;
958 rtype = rpcrdma_areadch;
959 }
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983 ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
984 if (ret)
985 goto out_err;
986 ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
987 if (ret)
988 goto out_err;
989 ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
990 if (ret)
991 goto out_err;
992
993 ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
994 buf, rtype);
995 if (ret)
996 goto out_err;
997
998 trace_xprtrdma_marshal(req, rtype, wtype);
999 return 0;
1000
1001out_err:
1002 trace_xprtrdma_marshal_failed(rqst, ret);
1003 r_xprt->rx_stats.failed_marshal_count++;
1004 frwr_reset(req);
1005 return ret;
1006}
1007
1008static void __rpcrdma_update_cwnd_locked(struct rpc_xprt *xprt,
1009 struct rpcrdma_buffer *buf,
1010 u32 grant)
1011{
1012 buf->rb_credits = grant;
1013 xprt->cwnd = grant << RPC_CWNDSHIFT;
1014}
1015
1016static void rpcrdma_update_cwnd(struct rpcrdma_xprt *r_xprt, u32 grant)
1017{
1018 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1019
1020 spin_lock(&xprt->transport_lock);
1021 __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, grant);
1022 spin_unlock(&xprt->transport_lock);
1023}
1024
1025
1026
1027
1028
1029
1030
1031
1032void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt)
1033{
1034 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1035
1036 spin_lock(&xprt->transport_lock);
1037 xprt->cong = 0;
1038 __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, 1);
1039 spin_unlock(&xprt->transport_lock);
1040}
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060static unsigned long
1061rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
1062{
1063 unsigned long fixup_copy_count;
1064 int i, npages, curlen;
1065 char *destp;
1066 struct page **ppages;
1067 int page_base;
1068
1069
1070
1071
1072 rqst->rq_rcv_buf.head[0].iov_base = srcp;
1073 rqst->rq_private_buf.head[0].iov_base = srcp;
1074
1075
1076
1077
1078 curlen = rqst->rq_rcv_buf.head[0].iov_len;
1079 if (curlen > copy_len)
1080 curlen = copy_len;
1081 srcp += curlen;
1082 copy_len -= curlen;
1083
1084 ppages = rqst->rq_rcv_buf.pages +
1085 (rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
1086 page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
1087 fixup_copy_count = 0;
1088 if (copy_len && rqst->rq_rcv_buf.page_len) {
1089 int pagelist_len;
1090
1091 pagelist_len = rqst->rq_rcv_buf.page_len;
1092 if (pagelist_len > copy_len)
1093 pagelist_len = copy_len;
1094 npages = PAGE_ALIGN(page_base + pagelist_len) >> PAGE_SHIFT;
1095 for (i = 0; i < npages; i++) {
1096 curlen = PAGE_SIZE - page_base;
1097 if (curlen > pagelist_len)
1098 curlen = pagelist_len;
1099
1100 destp = kmap_atomic(ppages[i]);
1101 memcpy(destp + page_base, srcp, curlen);
1102 flush_dcache_page(ppages[i]);
1103 kunmap_atomic(destp);
1104 srcp += curlen;
1105 copy_len -= curlen;
1106 fixup_copy_count += curlen;
1107 pagelist_len -= curlen;
1108 if (!pagelist_len)
1109 break;
1110 page_base = 0;
1111 }
1112
1113
1114
1115
1116
1117
1118
1119 if (pad)
1120 srcp -= pad;
1121 }
1122
1123
1124
1125
1126 if (copy_len || pad) {
1127 rqst->rq_rcv_buf.tail[0].iov_base = srcp;
1128 rqst->rq_private_buf.tail[0].iov_base = srcp;
1129 }
1130
1131 if (fixup_copy_count)
1132 trace_xprtrdma_fixup(rqst, fixup_copy_count);
1133 return fixup_copy_count;
1134}
1135
1136
1137
1138
1139
1140
1141static bool
1142rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
1143#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1144{
1145 struct xdr_stream *xdr = &rep->rr_stream;
1146 __be32 *p;
1147
1148 if (rep->rr_proc != rdma_msg)
1149 return false;
1150
1151
1152 p = xdr_inline_decode(xdr, 0);
1153
1154
1155 if (*p++ != xdr_zero)
1156 return false;
1157 if (*p++ != xdr_zero)
1158 return false;
1159 if (*p++ != xdr_zero)
1160 return false;
1161
1162
1163 if (*p++ != rep->rr_xid)
1164 return false;
1165 if (*p != cpu_to_be32(RPC_CALL))
1166 return false;
1167
1168
1169
1170
1171 p = xdr_inline_decode(xdr, 3 * sizeof(*p));
1172 if (unlikely(!p))
1173 goto out_short;
1174
1175 rpcrdma_bc_receive_call(r_xprt, rep);
1176 return true;
1177
1178out_short:
1179 pr_warn("RPC/RDMA short backward direction call\n");
1180 return true;
1181}
1182#else
1183{
1184 return false;
1185}
1186#endif
1187
1188static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
1189{
1190 u32 handle;
1191 u64 offset;
1192 __be32 *p;
1193
1194 p = xdr_inline_decode(xdr, 4 * sizeof(*p));
1195 if (unlikely(!p))
1196 return -EIO;
1197
1198 handle = be32_to_cpup(p++);
1199 *length = be32_to_cpup(p++);
1200 xdr_decode_hyper(p, &offset);
1201
1202 trace_xprtrdma_decode_seg(handle, *length, offset);
1203 return 0;
1204}
1205
1206static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
1207{
1208 u32 segcount, seglength;
1209 __be32 *p;
1210
1211 p = xdr_inline_decode(xdr, sizeof(*p));
1212 if (unlikely(!p))
1213 return -EIO;
1214
1215 *length = 0;
1216 segcount = be32_to_cpup(p);
1217 while (segcount--) {
1218 if (decode_rdma_segment(xdr, &seglength))
1219 return -EIO;
1220 *length += seglength;
1221 }
1222
1223 return 0;
1224}
1225
1226
1227
1228
1229
1230static int decode_read_list(struct xdr_stream *xdr)
1231{
1232 __be32 *p;
1233
1234 p = xdr_inline_decode(xdr, sizeof(*p));
1235 if (unlikely(!p))
1236 return -EIO;
1237 if (unlikely(*p != xdr_zero))
1238 return -EIO;
1239 return 0;
1240}
1241
1242
1243
1244static int decode_write_list(struct xdr_stream *xdr, u32 *length)
1245{
1246 u32 chunklen;
1247 bool first;
1248 __be32 *p;
1249
1250 *length = 0;
1251 first = true;
1252 do {
1253 p = xdr_inline_decode(xdr, sizeof(*p));
1254 if (unlikely(!p))
1255 return -EIO;
1256 if (*p == xdr_zero)
1257 break;
1258 if (!first)
1259 return -EIO;
1260
1261 if (decode_write_chunk(xdr, &chunklen))
1262 return -EIO;
1263 *length += chunklen;
1264 first = false;
1265 } while (true);
1266 return 0;
1267}
1268
1269static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
1270{
1271 __be32 *p;
1272
1273 p = xdr_inline_decode(xdr, sizeof(*p));
1274 if (unlikely(!p))
1275 return -EIO;
1276
1277 *length = 0;
1278 if (*p != xdr_zero)
1279 if (decode_write_chunk(xdr, length))
1280 return -EIO;
1281 return 0;
1282}
1283
1284static int
1285rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1286 struct rpc_rqst *rqst)
1287{
1288 struct xdr_stream *xdr = &rep->rr_stream;
1289 u32 writelist, replychunk, rpclen;
1290 char *base;
1291
1292
1293 if (decode_read_list(xdr))
1294 return -EIO;
1295 if (decode_write_list(xdr, &writelist))
1296 return -EIO;
1297 if (decode_reply_chunk(xdr, &replychunk))
1298 return -EIO;
1299
1300
1301 if (unlikely(replychunk))
1302 return -EIO;
1303
1304
1305 base = (char *)xdr_inline_decode(xdr, 0);
1306 rpclen = xdr_stream_remaining(xdr);
1307 r_xprt->rx_stats.fixup_copy_count +=
1308 rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3);
1309
1310 r_xprt->rx_stats.total_rdma_reply += writelist;
1311 return rpclen + xdr_align_size(writelist);
1312}
1313
1314static noinline int
1315rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
1316{
1317 struct xdr_stream *xdr = &rep->rr_stream;
1318 u32 writelist, replychunk;
1319
1320
1321 if (decode_read_list(xdr))
1322 return -EIO;
1323 if (decode_write_list(xdr, &writelist))
1324 return -EIO;
1325 if (decode_reply_chunk(xdr, &replychunk))
1326 return -EIO;
1327
1328
1329 if (unlikely(writelist))
1330 return -EIO;
1331 if (unlikely(!replychunk))
1332 return -EIO;
1333
1334
1335 r_xprt->rx_stats.total_rdma_reply += replychunk;
1336 return replychunk;
1337}
1338
1339static noinline int
1340rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
1341 struct rpc_rqst *rqst)
1342{
1343 struct xdr_stream *xdr = &rep->rr_stream;
1344 __be32 *p;
1345
1346 p = xdr_inline_decode(xdr, sizeof(*p));
1347 if (unlikely(!p))
1348 return -EIO;
1349
1350 switch (*p) {
1351 case err_vers:
1352 p = xdr_inline_decode(xdr, 2 * sizeof(*p));
1353 if (!p)
1354 break;
1355 dprintk("RPC: %s: server reports "
1356 "version error (%u-%u), xid %08x\n", __func__,
1357 be32_to_cpup(p), be32_to_cpu(*(p + 1)),
1358 be32_to_cpu(rep->rr_xid));
1359 break;
1360 case err_chunk:
1361 dprintk("RPC: %s: server reports "
1362 "header decoding error, xid %08x\n", __func__,
1363 be32_to_cpu(rep->rr_xid));
1364 break;
1365 default:
1366 dprintk("RPC: %s: server reports "
1367 "unrecognized error %d, xid %08x\n", __func__,
1368 be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
1369 }
1370
1371 r_xprt->rx_stats.bad_reply_count++;
1372 return -EREMOTEIO;
1373}
1374
1375
1376
1377
1378
1379void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
1380{
1381 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1382 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1383 struct rpc_rqst *rqst = rep->rr_rqst;
1384 int status;
1385
1386 switch (rep->rr_proc) {
1387 case rdma_msg:
1388 status = rpcrdma_decode_msg(r_xprt, rep, rqst);
1389 break;
1390 case rdma_nomsg:
1391 status = rpcrdma_decode_nomsg(r_xprt, rep);
1392 break;
1393 case rdma_error:
1394 status = rpcrdma_decode_error(r_xprt, rep, rqst);
1395 break;
1396 default:
1397 status = -EIO;
1398 }
1399 if (status < 0)
1400 goto out_badheader;
1401
1402out:
1403 spin_lock(&xprt->queue_lock);
1404 xprt_complete_rqst(rqst->rq_task, status);
1405 xprt_unpin_rqst(rqst);
1406 spin_unlock(&xprt->queue_lock);
1407 return;
1408
1409
1410
1411
1412
1413out_badheader:
1414 trace_xprtrdma_reply_hdr(rep);
1415 r_xprt->rx_stats.bad_reply_count++;
1416 goto out;
1417}
1418
1419static void rpcrdma_reply_done(struct kref *kref)
1420{
1421 struct rpcrdma_req *req =
1422 container_of(kref, struct rpcrdma_req, rl_kref);
1423
1424 rpcrdma_complete_rqst(req->rl_reply);
1425}
1426
1427
1428
1429
1430
1431
1432
1433
1434void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
1435{
1436 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
1437 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
1438 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1439 struct rpcrdma_req *req;
1440 struct rpc_rqst *rqst;
1441 u32 credits;
1442 __be32 *p;
1443
1444
1445
1446
1447 if (xprt->reestablish_timeout)
1448 xprt->reestablish_timeout = 0;
1449
1450
1451 xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
1452 rep->rr_hdrbuf.head[0].iov_base, NULL);
1453 p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
1454 if (unlikely(!p))
1455 goto out_shortreply;
1456 rep->rr_xid = *p++;
1457 rep->rr_vers = *p++;
1458 credits = be32_to_cpu(*p++);
1459 rep->rr_proc = *p++;
1460
1461 if (rep->rr_vers != rpcrdma_version)
1462 goto out_badversion;
1463
1464 if (rpcrdma_is_bcall(r_xprt, rep))
1465 return;
1466
1467
1468
1469
1470 spin_lock(&xprt->queue_lock);
1471 rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
1472 if (!rqst)
1473 goto out_norqst;
1474 xprt_pin_rqst(rqst);
1475 spin_unlock(&xprt->queue_lock);
1476
1477 if (credits == 0)
1478 credits = 1;
1479 else if (credits > r_xprt->rx_ep.rep_max_requests)
1480 credits = r_xprt->rx_ep.rep_max_requests;
1481 if (buf->rb_credits != credits)
1482 rpcrdma_update_cwnd(r_xprt, credits);
1483 rpcrdma_post_recvs(r_xprt, false);
1484
1485 req = rpcr_to_rdmar(rqst);
1486 if (req->rl_reply) {
1487 trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
1488 rpcrdma_recv_buffer_put(req->rl_reply);
1489 }
1490 req->rl_reply = rep;
1491 rep->rr_rqst = rqst;
1492
1493 trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
1494
1495 if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
1496 frwr_reminv(rep, &req->rl_registered);
1497 if (!list_empty(&req->rl_registered))
1498 frwr_unmap_async(r_xprt, req);
1499
1500 else
1501 kref_put(&req->rl_kref, rpcrdma_reply_done);
1502 return;
1503
1504out_badversion:
1505 trace_xprtrdma_reply_vers(rep);
1506 goto out;
1507
1508out_norqst:
1509 spin_unlock(&xprt->queue_lock);
1510 trace_xprtrdma_reply_rqst(rep);
1511 goto out;
1512
1513out_shortreply:
1514 trace_xprtrdma_reply_short(rep);
1515
1516out:
1517 rpcrdma_recv_buffer_put(rep);
1518}
1519