1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95#include <asm/unaligned.h>
96#include <rdma/ib_verbs.h>
97#include <rdma/rdma_cm.h>
98
99#include <linux/spinlock.h>
100
101#include <linux/sunrpc/xdr.h>
102#include <linux/sunrpc/debug.h>
103#include <linux/sunrpc/rpc_rdma.h>
104#include <linux/sunrpc/svc_rdma.h>
105
106#define RPCDBG_FACILITY RPCDBG_SVCXPRT
107
108
109
110
111
112
113static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
114 struct svc_rdma_op_ctxt *ctxt,
115 u32 byte_count)
116{
117 struct page *page;
118 u32 bc;
119 int sge_no;
120
121
122 page = ctxt->pages[0];
123 put_page(rqstp->rq_pages[0]);
124 rqstp->rq_pages[0] = page;
125
126
127 rqstp->rq_arg.head[0].iov_base = page_address(page);
128 rqstp->rq_arg.head[0].iov_len =
129 min_t(size_t, byte_count, ctxt->sge[0].length);
130 rqstp->rq_arg.len = byte_count;
131 rqstp->rq_arg.buflen = byte_count;
132
133
134 bc = byte_count - rqstp->rq_arg.head[0].iov_len;
135
136
137 rqstp->rq_arg.page_len = bc;
138 rqstp->rq_arg.page_base = 0;
139
140 sge_no = 1;
141 while (bc && sge_no < ctxt->count) {
142 page = ctxt->pages[sge_no];
143 put_page(rqstp->rq_pages[sge_no]);
144 rqstp->rq_pages[sge_no] = page;
145 bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
146 sge_no++;
147 }
148 rqstp->rq_respages = &rqstp->rq_pages[sge_no];
149 rqstp->rq_next_page = rqstp->rq_respages + 1;
150
151
152 bc = sge_no;
153 while (sge_no < ctxt->count) {
154 page = ctxt->pages[sge_no++];
155 put_page(page);
156 }
157 ctxt->count = bc;
158
159
160 rqstp->rq_arg.tail[0].iov_base = NULL;
161 rqstp->rq_arg.tail[0].iov_len = 0;
162}
163
164
165
166
167#define MAX_BYTES_WRITE_SEG ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
168
169
170
171
172#define MAX_BYTES_SPECIAL_SEG ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
191{
192 u32 position;
193 bool first;
194
195 first = true;
196 while (*p++ != xdr_zero) {
197 if (first) {
198 position = be32_to_cpup(p++);
199 first = false;
200 } else if (be32_to_cpup(p++) != position) {
201 return NULL;
202 }
203 p++;
204 if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG)
205 return NULL;
206 p += 2;
207
208 if (p > end)
209 return NULL;
210 }
211 return p;
212}
213
214
215
216
217
218
219static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end,
220 u32 maxlen)
221{
222 u32 i, segcount;
223
224 segcount = be32_to_cpup(p++);
225 for (i = 0; i < segcount; i++) {
226 p++;
227 if (be32_to_cpup(p++) > maxlen)
228 return NULL;
229 p += 2;
230
231 if (p > end)
232 return NULL;
233 }
234
235 return p;
236}
237
238
239
240
241
242
243
244
245
246
247
248
249static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end)
250{
251 u32 chcount;
252
253 chcount = 0;
254 while (*p++ != xdr_zero) {
255 p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG);
256 if (!p)
257 return NULL;
258 if (chcount++ > 1)
259 return NULL;
260 }
261 return p;
262}
263
264
265
266
267
268
269
270
271
272static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
273{
274 if (*p++ != xdr_zero) {
275 p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG);
276 if (!p)
277 return NULL;
278 }
279 return p;
280}
281
282
283
284
285
286
287
288
289
290
291
292static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
293{
294 __be32 *p, *end, *rdma_argp;
295 unsigned int hdr_len;
296 char *proc;
297
298
299 if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
300 goto out_short;
301
302 rdma_argp = rq_arg->head[0].iov_base;
303 if (*(rdma_argp + 1) != rpcrdma_version)
304 goto out_version;
305
306 switch (*(rdma_argp + 3)) {
307 case rdma_msg:
308 proc = "RDMA_MSG";
309 break;
310 case rdma_nomsg:
311 proc = "RDMA_NOMSG";
312 break;
313
314 case rdma_done:
315 goto out_drop;
316
317 case rdma_error:
318 goto out_drop;
319
320 default:
321 goto out_proc;
322 }
323
324 end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
325 p = xdr_check_read_list(rdma_argp + 4, end);
326 if (!p)
327 goto out_inval;
328 p = xdr_check_write_list(p, end);
329 if (!p)
330 goto out_inval;
331 p = xdr_check_reply_chunk(p, end);
332 if (!p)
333 goto out_inval;
334 if (p > end)
335 goto out_inval;
336
337 rq_arg->head[0].iov_base = p;
338 hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
339 rq_arg->head[0].iov_len -= hdr_len;
340 rq_arg->len -= hdr_len;
341 dprintk("svcrdma: received %s request for XID 0x%08x, hdr_len=%u\n",
342 proc, be32_to_cpup(rdma_argp), hdr_len);
343 return hdr_len;
344
345out_short:
346 dprintk("svcrdma: header too short = %d\n", rq_arg->len);
347 return -EINVAL;
348
349out_version:
350 dprintk("svcrdma: bad xprt version: %u\n",
351 be32_to_cpup(rdma_argp + 1));
352 return -EPROTONOSUPPORT;
353
354out_drop:
355 dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
356 return 0;
357
358out_proc:
359 dprintk("svcrdma: bad rdma procedure (%u)\n",
360 be32_to_cpup(rdma_argp + 3));
361 return -EINVAL;
362
363out_inval:
364 dprintk("svcrdma: failed to parse transport header\n");
365 return -EINVAL;
366}
367
368static void rdma_read_complete(struct svc_rqst *rqstp,
369 struct svc_rdma_op_ctxt *head)
370{
371 int page_no;
372
373
374 for (page_no = 0; page_no < head->count; page_no++) {
375 put_page(rqstp->rq_pages[page_no]);
376 rqstp->rq_pages[page_no] = head->pages[page_no];
377 }
378
379
380 rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
381 rqstp->rq_arg.page_len = head->arg.page_len;
382
383
384 rqstp->rq_respages = &rqstp->rq_pages[page_no];
385 rqstp->rq_next_page = rqstp->rq_respages + 1;
386
387
388 rqstp->rq_arg.head[0] = head->arg.head[0];
389 rqstp->rq_arg.tail[0] = head->arg.tail[0];
390 rqstp->rq_arg.len = head->arg.len;
391 rqstp->rq_arg.buflen = head->arg.buflen;
392}
393
394static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
395 __be32 *rdma_argp, int status)
396{
397 struct svc_rdma_op_ctxt *ctxt;
398 __be32 *p, *err_msgp;
399 unsigned int length;
400 struct page *page;
401 int ret;
402
403 page = alloc_page(GFP_KERNEL);
404 if (!page)
405 return;
406 err_msgp = page_address(page);
407
408 p = err_msgp;
409 *p++ = *rdma_argp;
410 *p++ = *(rdma_argp + 1);
411 *p++ = xprt->sc_fc_credits;
412 *p++ = rdma_error;
413 if (status == -EPROTONOSUPPORT) {
414 *p++ = err_vers;
415 *p++ = rpcrdma_version;
416 *p++ = rpcrdma_version;
417 } else {
418 *p++ = err_chunk;
419 }
420 length = (unsigned long)p - (unsigned long)err_msgp;
421
422
423 ctxt = svc_rdma_get_context(xprt);
424 ret = svc_rdma_map_reply_hdr(xprt, ctxt, err_msgp, length);
425 if (ret) {
426 dprintk("svcrdma: Error %d mapping send for protocol error\n",
427 ret);
428 return;
429 }
430
431 ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0);
432 if (ret) {
433 dprintk("svcrdma: Error %d posting send for protocol error\n",
434 ret);
435 svc_rdma_unmap_dma(ctxt);
436 svc_rdma_put_context(ctxt, 1);
437 }
438}
439
440
441
442
443
444
445static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt,
446 __be32 *rdma_resp)
447{
448 __be32 *p;
449
450 if (!xprt->xpt_bc_xprt)
451 return false;
452
453 p = rdma_resp + 3;
454 if (*p++ != rdma_msg)
455 return false;
456
457 if (*p++ != xdr_zero)
458 return false;
459 if (*p++ != xdr_zero)
460 return false;
461 if (*p++ != xdr_zero)
462 return false;
463
464
465 if (*p++ != *rdma_resp)
466 return false;
467
468 if (*p == cpu_to_be32(RPC_CALL))
469 return false;
470
471 return true;
472}
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504int svc_rdma_recvfrom(struct svc_rqst *rqstp)
505{
506 struct svc_xprt *xprt = rqstp->rq_xprt;
507 struct svcxprt_rdma *rdma_xprt =
508 container_of(xprt, struct svcxprt_rdma, sc_xprt);
509 struct svc_rdma_op_ctxt *ctxt;
510 __be32 *p;
511 int ret;
512
513 spin_lock(&rdma_xprt->sc_rq_dto_lock);
514 if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
515 ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
516 struct svc_rdma_op_ctxt, list);
517 list_del(&ctxt->list);
518 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
519 rdma_read_complete(rqstp, ctxt);
520 goto complete;
521 } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
522 ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q,
523 struct svc_rdma_op_ctxt, list);
524 list_del(&ctxt->list);
525 } else {
526
527 clear_bit(XPT_DATA, &xprt->xpt_flags);
528 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
529 return 0;
530 }
531 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
532
533 dprintk("svcrdma: recvfrom: ctxt=%p on xprt=%p, rqstp=%p\n",
534 ctxt, rdma_xprt, rqstp);
535 atomic_inc(&rdma_stat_recv);
536
537
538 rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
539
540
541 p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
542 ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
543 if (ret < 0)
544 goto out_err;
545 if (ret == 0)
546 goto out_drop;
547 rqstp->rq_xprt_hlen = ret;
548
549 if (svc_rdma_is_backchannel_reply(xprt, p)) {
550 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
551 &rqstp->rq_arg);
552 svc_rdma_put_context(ctxt, 0);
553 return ret;
554 }
555
556 p += rpcrdma_fixed_maxsz;
557 if (*p != xdr_zero)
558 goto out_readchunk;
559
560complete:
561 svc_rdma_put_context(ctxt, 0);
562 dprintk("svcrdma: recvfrom: xprt=%p, rqstp=%p, rq_arg.len=%u\n",
563 rdma_xprt, rqstp, rqstp->rq_arg.len);
564 rqstp->rq_prot = IPPROTO_MAX;
565 svc_xprt_copy_addrs(rqstp, xprt);
566 return rqstp->rq_arg.len;
567
568out_readchunk:
569 ret = svc_rdma_recv_read_chunk(rdma_xprt, rqstp, ctxt, p);
570 if (ret < 0)
571 goto out_postfail;
572 return 0;
573
574out_err:
575 svc_rdma_send_error(rdma_xprt, p, ret);
576 svc_rdma_put_context(ctxt, 0);
577 return 0;
578
579out_postfail:
580 if (ret == -EINVAL)
581 svc_rdma_send_error(rdma_xprt, p, ret);
582 svc_rdma_put_context(ctxt, 1);
583 return ret;
584
585out_drop:
586 svc_rdma_put_context(ctxt, 1);
587 return 0;
588}
589