1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73#include <linux/sunrpc/rpc_rdma.h>
74#include <linux/sunrpc/svc_rdma.h>
75
76#include "xprt_rdma.h"
77#include <trace/events/rpcrdma.h>
78
79#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
80# define RPCDBG_FACILITY RPCDBG_TRANS
81#endif
82
83
84
85
86
87
88
89bool frwr_is_supported(struct ib_device *device)
90{
91 struct ib_device_attr *attrs = &device->attrs;
92
93 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
94 goto out_not_supported;
95 if (attrs->max_fast_reg_page_list_len == 0)
96 goto out_not_supported;
97 return true;
98
99out_not_supported:
100 pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n",
101 device->name);
102 return false;
103}
104
105
106
107
108
109
110void frwr_release_mr(struct rpcrdma_mr *mr)
111{
112 int rc;
113
114 rc = ib_dereg_mr(mr->frwr.fr_mr);
115 if (rc)
116 trace_xprtrdma_frwr_dereg(mr, rc);
117 kfree(mr->mr_sg);
118 kfree(mr);
119}
120
121
122
123
124static void
125frwr_mr_recycle_worker(struct work_struct *work)
126{
127 struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
128 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
129
130 trace_xprtrdma_mr_recycle(mr);
131
132 if (mr->mr_dir != DMA_NONE) {
133 trace_xprtrdma_mr_unmap(mr);
134 ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
135 mr->mr_sg, mr->mr_nents, mr->mr_dir);
136 mr->mr_dir = DMA_NONE;
137 }
138
139 spin_lock(&r_xprt->rx_buf.rb_mrlock);
140 list_del(&mr->mr_all);
141 r_xprt->rx_stats.mrs_recycled++;
142 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
143
144 frwr_release_mr(mr);
145}
146
147
148
149
150
151
152
153
154
155
156
157void frwr_reset(struct rpcrdma_req *req)
158{
159 while (!list_empty(&req->rl_registered)) {
160 struct rpcrdma_mr *mr;
161
162 mr = rpcrdma_mr_pop(&req->rl_registered);
163 rpcrdma_mr_unmap_and_put(mr);
164 }
165}
166
167
168
169
170
171
172
173
174
175int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
176{
177 unsigned int depth = ia->ri_max_frwr_depth;
178 struct scatterlist *sg;
179 struct ib_mr *frmr;
180 int rc;
181
182 frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
183 if (IS_ERR(frmr))
184 goto out_mr_err;
185
186 sg = kcalloc(depth, sizeof(*sg), GFP_KERNEL);
187 if (!sg)
188 goto out_list_err;
189
190 mr->frwr.fr_mr = frmr;
191 mr->mr_dir = DMA_NONE;
192 INIT_LIST_HEAD(&mr->mr_list);
193 INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
194 init_completion(&mr->frwr.fr_linv_done);
195
196 sg_init_table(sg, depth);
197 mr->mr_sg = sg;
198 return 0;
199
200out_mr_err:
201 rc = PTR_ERR(frmr);
202 trace_xprtrdma_frwr_alloc(mr, rc);
203 return rc;
204
205out_list_err:
206 dprintk("RPC: %s: sg allocation failure\n",
207 __func__);
208 ib_dereg_mr(frmr);
209 return -ENOMEM;
210}
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
230{
231 struct ib_device_attr *attrs = &ia->ri_id->device->attrs;
232 int max_qp_wr, depth, delta;
233
234 ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
235 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
236 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
237
238
239
240
241
242 if (attrs->max_sge_rd > 1)
243 ia->ri_max_frwr_depth = attrs->max_sge_rd;
244 else
245 ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len;
246 if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS)
247 ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS;
248 dprintk("RPC: %s: max FR page list depth = %u\n",
249 __func__, ia->ri_max_frwr_depth);
250
251
252
253
254
255
256
257
258
259
260 depth = 7;
261
262
263
264
265 if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
266 delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
267 do {
268 depth += 2;
269 delta -= ia->ri_max_frwr_depth;
270 } while (delta > 0);
271 }
272
273 max_qp_wr = ia->ri_id->device->attrs.max_qp_wr;
274 max_qp_wr -= RPCRDMA_BACKWARD_WRS;
275 max_qp_wr -= 1;
276 if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
277 return -ENOMEM;
278 if (ep->rep_max_requests > max_qp_wr)
279 ep->rep_max_requests = max_qp_wr;
280 ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
281 if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
282 ep->rep_max_requests = max_qp_wr / depth;
283 if (!ep->rep_max_requests)
284 return -EINVAL;
285 ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
286 }
287 ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
288 ep->rep_attr.cap.max_send_wr += 1;
289 ep->rep_attr.cap.max_recv_wr = ep->rep_max_requests;
290 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
291 ep->rep_attr.cap.max_recv_wr += 1;
292
293 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
294 ia->ri_max_frwr_depth);
295
296 ia->ri_max_segs += 2;
297 if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
298 ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
299 return 0;
300}
301
302
303
304
305
306
307
308
309
310
311size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
312{
313 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
314
315 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
316 (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
317}
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
335 struct rpcrdma_mr_seg *seg,
336 int nsegs, bool writing, __be32 xid,
337 struct rpcrdma_mr **out)
338{
339 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
340 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
341 struct rpcrdma_mr *mr;
342 struct ib_mr *ibmr;
343 struct ib_reg_wr *reg_wr;
344 int i, n;
345 u8 key;
346
347 mr = rpcrdma_mr_get(r_xprt);
348 if (!mr)
349 goto out_getmr_err;
350
351 if (nsegs > ia->ri_max_frwr_depth)
352 nsegs = ia->ri_max_frwr_depth;
353 for (i = 0; i < nsegs;) {
354 if (seg->mr_page)
355 sg_set_page(&mr->mr_sg[i],
356 seg->mr_page,
357 seg->mr_len,
358 offset_in_page(seg->mr_offset));
359 else
360 sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
361 seg->mr_len);
362
363 ++seg;
364 ++i;
365 if (holes_ok)
366 continue;
367 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
368 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
369 break;
370 }
371 mr->mr_dir = rpcrdma_data_dir(writing);
372
373 mr->mr_nents =
374 ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
375 if (!mr->mr_nents)
376 goto out_dmamap_err;
377
378 ibmr = mr->frwr.fr_mr;
379 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
380 if (unlikely(n != mr->mr_nents))
381 goto out_mapmr_err;
382
383 ibmr->iova &= 0x00000000ffffffff;
384 ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32;
385 key = (u8)(ibmr->rkey & 0x000000FF);
386 ib_update_fast_reg_key(ibmr, ++key);
387
388 reg_wr = &mr->frwr.fr_regwr;
389 reg_wr->mr = ibmr;
390 reg_wr->key = ibmr->rkey;
391 reg_wr->access = writing ?
392 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
393 IB_ACCESS_REMOTE_READ;
394
395 mr->mr_handle = ibmr->rkey;
396 mr->mr_length = ibmr->length;
397 mr->mr_offset = ibmr->iova;
398 trace_xprtrdma_mr_map(mr);
399
400 *out = mr;
401 return seg;
402
403out_getmr_err:
404 xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
405 return ERR_PTR(-EAGAIN);
406
407out_dmamap_err:
408 mr->mr_dir = DMA_NONE;
409 trace_xprtrdma_frwr_sgerr(mr, i);
410 rpcrdma_mr_put(mr);
411 return ERR_PTR(-EIO);
412
413out_mapmr_err:
414 trace_xprtrdma_frwr_maperr(mr, n);
415 rpcrdma_mr_recycle(mr);
416 return ERR_PTR(-EIO);
417}
418
419
420
421
422
423
424
425static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
426{
427 struct ib_cqe *cqe = wc->wr_cqe;
428 struct rpcrdma_frwr *frwr =
429 container_of(cqe, struct rpcrdma_frwr, fr_cqe);
430
431
432 trace_xprtrdma_wc_fastreg(wc, frwr);
433
434}
435
436
437
438
439
440
441
442
443
444
445
446
447int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
448{
449 struct ib_send_wr *post_wr;
450 struct rpcrdma_mr *mr;
451
452 post_wr = &req->rl_sendctx->sc_wr;
453 list_for_each_entry(mr, &req->rl_registered, mr_list) {
454 struct rpcrdma_frwr *frwr;
455
456 frwr = &mr->frwr;
457
458 frwr->fr_cqe.done = frwr_wc_fastreg;
459 frwr->fr_regwr.wr.next = post_wr;
460 frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
461 frwr->fr_regwr.wr.num_sge = 0;
462 frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
463 frwr->fr_regwr.wr.send_flags = 0;
464
465 post_wr = &frwr->fr_regwr.wr;
466 }
467
468
469
470
471 return ib_post_send(ia->ri_id->qp, post_wr, NULL);
472}
473
474
475
476
477
478
479
480void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
481{
482 struct rpcrdma_mr *mr;
483
484 list_for_each_entry(mr, mrs, mr_list)
485 if (mr->mr_handle == rep->rr_inv_rkey) {
486 list_del_init(&mr->mr_list);
487 trace_xprtrdma_mr_remoteinv(mr);
488 rpcrdma_mr_unmap_and_put(mr);
489 break;
490 }
491}
492
493static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
494{
495 if (wc->status != IB_WC_SUCCESS)
496 rpcrdma_mr_recycle(mr);
497 else
498 rpcrdma_mr_unmap_and_put(mr);
499}
500
501
502
503
504
505
506
507static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
508{
509 struct ib_cqe *cqe = wc->wr_cqe;
510 struct rpcrdma_frwr *frwr =
511 container_of(cqe, struct rpcrdma_frwr, fr_cqe);
512 struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
513
514
515 trace_xprtrdma_wc_li(wc, frwr);
516 __frwr_release_mr(wc, mr);
517}
518
519
520
521
522
523
524
525
526static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
527{
528 struct ib_cqe *cqe = wc->wr_cqe;
529 struct rpcrdma_frwr *frwr =
530 container_of(cqe, struct rpcrdma_frwr, fr_cqe);
531 struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
532
533
534 trace_xprtrdma_wc_li_wake(wc, frwr);
535 complete(&frwr->fr_linv_done);
536 __frwr_release_mr(wc, mr);
537}
538
539
540
541
542
543
544
545
546
547
548
549
550void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
551{
552 struct ib_send_wr *first, **prev, *last;
553 const struct ib_send_wr *bad_wr;
554 struct rpcrdma_frwr *frwr;
555 struct rpcrdma_mr *mr;
556 int rc;
557
558
559
560
561
562
563 frwr = NULL;
564 prev = &first;
565 while (!list_empty(&req->rl_registered)) {
566 mr = rpcrdma_mr_pop(&req->rl_registered);
567
568 trace_xprtrdma_mr_localinv(mr);
569 r_xprt->rx_stats.local_inv_needed++;
570
571 frwr = &mr->frwr;
572 frwr->fr_cqe.done = frwr_wc_localinv;
573 last = &frwr->fr_invwr;
574 last->next = NULL;
575 last->wr_cqe = &frwr->fr_cqe;
576 last->sg_list = NULL;
577 last->num_sge = 0;
578 last->opcode = IB_WR_LOCAL_INV;
579 last->send_flags = IB_SEND_SIGNALED;
580 last->ex.invalidate_rkey = mr->mr_handle;
581
582 *prev = last;
583 prev = &last->next;
584 }
585
586
587
588
589
590 frwr->fr_cqe.done = frwr_wc_localinv_wake;
591 reinit_completion(&frwr->fr_linv_done);
592
593
594
595
596
597 bad_wr = NULL;
598 rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
599 trace_xprtrdma_post_send(req, rc);
600
601
602
603
604
605 if (bad_wr != first)
606 wait_for_completion(&frwr->fr_linv_done);
607 if (!rc)
608 return;
609
610
611
612 while (bad_wr) {
613 frwr = container_of(bad_wr, struct rpcrdma_frwr,
614 fr_invwr);
615 mr = container_of(frwr, struct rpcrdma_mr, frwr);
616 bad_wr = bad_wr->next;
617
618 list_del_init(&mr->mr_list);
619 rpcrdma_mr_recycle(mr);
620 }
621}
622
623
624
625
626
627
628
629static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
630{
631 struct ib_cqe *cqe = wc->wr_cqe;
632 struct rpcrdma_frwr *frwr =
633 container_of(cqe, struct rpcrdma_frwr, fr_cqe);
634 struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
635
636
637 trace_xprtrdma_wc_li_done(wc, frwr);
638 rpcrdma_complete_rqst(frwr->fr_req->rl_reply);
639 __frwr_release_mr(wc, mr);
640}
641
642
643
644
645
646
647
648
649
650
651
652void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
653{
654 struct ib_send_wr *first, *last, **prev;
655 const struct ib_send_wr *bad_wr;
656 struct rpcrdma_frwr *frwr;
657 struct rpcrdma_mr *mr;
658 int rc;
659
660
661
662
663 frwr = NULL;
664 prev = &first;
665 while (!list_empty(&req->rl_registered)) {
666 mr = rpcrdma_mr_pop(&req->rl_registered);
667
668 trace_xprtrdma_mr_localinv(mr);
669 r_xprt->rx_stats.local_inv_needed++;
670
671 frwr = &mr->frwr;
672 frwr->fr_cqe.done = frwr_wc_localinv;
673 frwr->fr_req = req;
674 last = &frwr->fr_invwr;
675 last->next = NULL;
676 last->wr_cqe = &frwr->fr_cqe;
677 last->sg_list = NULL;
678 last->num_sge = 0;
679 last->opcode = IB_WR_LOCAL_INV;
680 last->send_flags = IB_SEND_SIGNALED;
681 last->ex.invalidate_rkey = mr->mr_handle;
682
683 *prev = last;
684 prev = &last->next;
685 }
686
687
688
689
690
691
692 frwr->fr_cqe.done = frwr_wc_localinv_done;
693
694
695
696
697
698 bad_wr = NULL;
699 rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
700 trace_xprtrdma_post_send(req, rc);
701 if (!rc)
702 return;
703
704
705
706 while (bad_wr) {
707 frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
708 mr = container_of(frwr, struct rpcrdma_mr, frwr);
709 bad_wr = bad_wr->next;
710
711 rpcrdma_mr_recycle(mr);
712 }
713
714
715
716
717
718 rpcrdma_complete_rqst(req->rl_reply);
719}
720