1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73#include <linux/sunrpc/rpc_rdma.h>
74#include <linux/sunrpc/svc_rdma.h>
75
76#include "xprt_rdma.h"
77#include <trace/events/rpcrdma.h>
78
79#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
80# define RPCDBG_FACILITY RPCDBG_TRANS
81#endif
82
83bool
84frwr_is_supported(struct rpcrdma_ia *ia)
85{
86 struct ib_device_attr *attrs = &ia->ri_device->attrs;
87
88 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
89 goto out_not_supported;
90 if (attrs->max_fast_reg_page_list_len == 0)
91 goto out_not_supported;
92 return true;
93
94out_not_supported:
95 pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n",
96 ia->ri_device->name);
97 return false;
98}
99
100static int
101frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
102{
103 unsigned int depth = ia->ri_max_frwr_depth;
104 struct rpcrdma_frwr *frwr = &mr->frwr;
105 int rc;
106
107 frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
108 if (IS_ERR(frwr->fr_mr))
109 goto out_mr_err;
110
111 mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
112 if (!mr->mr_sg)
113 goto out_list_err;
114
115 INIT_LIST_HEAD(&mr->mr_list);
116 sg_init_table(mr->mr_sg, depth);
117 init_completion(&frwr->fr_linv_done);
118 return 0;
119
120out_mr_err:
121 rc = PTR_ERR(frwr->fr_mr);
122 dprintk("RPC: %s: ib_alloc_mr status %i\n",
123 __func__, rc);
124 return rc;
125
126out_list_err:
127 rc = -ENOMEM;
128 dprintk("RPC: %s: sg allocation failure\n",
129 __func__);
130 ib_dereg_mr(frwr->fr_mr);
131 return rc;
132}
133
134static void
135frwr_op_release_mr(struct rpcrdma_mr *mr)
136{
137 int rc;
138
139 rc = ib_dereg_mr(mr->frwr.fr_mr);
140 if (rc)
141 pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
142 mr, rc);
143 kfree(mr->mr_sg);
144 kfree(mr);
145}
146
147static int
148__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
149{
150 struct rpcrdma_frwr *frwr = &mr->frwr;
151 int rc;
152
153 rc = ib_dereg_mr(frwr->fr_mr);
154 if (rc) {
155 pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
156 rc, mr);
157 return rc;
158 }
159
160 frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
161 ia->ri_max_frwr_depth);
162 if (IS_ERR(frwr->fr_mr)) {
163 pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
164 PTR_ERR(frwr->fr_mr), mr);
165 return PTR_ERR(frwr->fr_mr);
166 }
167
168 dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
169 frwr->fr_state = FRWR_IS_INVALID;
170 return 0;
171}
172
173
174
175static void
176frwr_op_recover_mr(struct rpcrdma_mr *mr)
177{
178 enum rpcrdma_frwr_state state = mr->frwr.fr_state;
179 struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
180 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
181 int rc;
182
183 rc = __frwr_mr_reset(ia, mr);
184 if (state != FRWR_FLUSHED_LI) {
185 trace_xprtrdma_dma_unmap(mr);
186 ib_dma_unmap_sg(ia->ri_device,
187 mr->mr_sg, mr->mr_nents, mr->mr_dir);
188 }
189 if (rc)
190 goto out_release;
191
192 rpcrdma_mr_put(mr);
193 r_xprt->rx_stats.mrs_recovered++;
194 return;
195
196out_release:
197 pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
198 r_xprt->rx_stats.mrs_orphaned++;
199
200 spin_lock(&r_xprt->rx_buf.rb_mrlock);
201 list_del(&mr->mr_all);
202 spin_unlock(&r_xprt->rx_buf.rb_mrlock);
203
204 frwr_op_release_mr(mr);
205}
206
207
208
209
210
211
212
213
214
215
216
217static int
218frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
219 struct rpcrdma_create_data_internal *cdata)
220{
221 struct ib_device_attr *attrs = &ia->ri_device->attrs;
222 int max_qp_wr, depth, delta;
223
224 ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
225 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
226 ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
227
228 ia->ri_max_frwr_depth =
229 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
230 attrs->max_fast_reg_page_list_len);
231 dprintk("RPC: %s: device's max FR page list len = %u\n",
232 __func__, ia->ri_max_frwr_depth);
233
234
235
236
237
238
239
240
241
242
243 depth = 7;
244
245
246
247
248 if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
249 delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
250 do {
251 depth += 2;
252 delta -= ia->ri_max_frwr_depth;
253 } while (delta > 0);
254 }
255
256 max_qp_wr = ia->ri_device->attrs.max_qp_wr;
257 max_qp_wr -= RPCRDMA_BACKWARD_WRS;
258 max_qp_wr -= 1;
259 if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
260 return -ENOMEM;
261 if (cdata->max_requests > max_qp_wr)
262 cdata->max_requests = max_qp_wr;
263 ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
264 if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
265 cdata->max_requests = max_qp_wr / depth;
266 if (!cdata->max_requests)
267 return -EINVAL;
268 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
269 depth;
270 }
271 ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
272 ep->rep_attr.cap.max_send_wr += 1;
273 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
274 ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
275 ep->rep_attr.cap.max_recv_wr += 1;
276
277 ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
278 ia->ri_max_frwr_depth);
279 return 0;
280}
281
282
283
284
285static size_t
286frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
287{
288 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
289
290 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
291 RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
292}
293
294static void
295__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
296{
297 if (wc->status != IB_WC_WR_FLUSH_ERR)
298 pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
299 wr, ib_wc_status_msg(wc->status),
300 wc->status, wc->vendor_err);
301}
302
303
304
305
306
307
308
309static void
310frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
311{
312 struct ib_cqe *cqe = wc->wr_cqe;
313 struct rpcrdma_frwr *frwr =
314 container_of(cqe, struct rpcrdma_frwr, fr_cqe);
315
316
317 if (wc->status != IB_WC_SUCCESS) {
318 frwr->fr_state = FRWR_FLUSHED_FR;
319 __frwr_sendcompletion_flush(wc, "fastreg");
320 }
321 trace_xprtrdma_wc_fastreg(wc, frwr);
322}
323
324
325
326
327
328
329
330static void
331frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
332{
333 struct ib_cqe *cqe = wc->wr_cqe;
334 struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
335 fr_cqe);
336
337
338 if (wc->status != IB_WC_SUCCESS) {
339 frwr->fr_state = FRWR_FLUSHED_LI;
340 __frwr_sendcompletion_flush(wc, "localinv");
341 }
342 trace_xprtrdma_wc_li(wc, frwr);
343}
344
345
346
347
348
349
350
351
352static void
353frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
354{
355 struct ib_cqe *cqe = wc->wr_cqe;
356 struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
357 fr_cqe);
358
359
360 if (wc->status != IB_WC_SUCCESS) {
361 frwr->fr_state = FRWR_FLUSHED_LI;
362 __frwr_sendcompletion_flush(wc, "localinv");
363 }
364 complete(&frwr->fr_linv_done);
365 trace_xprtrdma_wc_li_wake(wc, frwr);
366}
367
368
369
370
371static struct rpcrdma_mr_seg *
372frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
373 int nsegs, bool writing, struct rpcrdma_mr **out)
374{
375 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
376 bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
377 struct rpcrdma_frwr *frwr;
378 struct rpcrdma_mr *mr;
379 struct ib_mr *ibmr;
380 struct ib_reg_wr *reg_wr;
381 int i, n;
382 u8 key;
383
384 mr = NULL;
385 do {
386 if (mr)
387 rpcrdma_mr_defer_recovery(mr);
388 mr = rpcrdma_mr_get(r_xprt);
389 if (!mr)
390 return ERR_PTR(-EAGAIN);
391 } while (mr->frwr.fr_state != FRWR_IS_INVALID);
392 frwr = &mr->frwr;
393 frwr->fr_state = FRWR_IS_VALID;
394
395 if (nsegs > ia->ri_max_frwr_depth)
396 nsegs = ia->ri_max_frwr_depth;
397 for (i = 0; i < nsegs;) {
398 if (seg->mr_page)
399 sg_set_page(&mr->mr_sg[i],
400 seg->mr_page,
401 seg->mr_len,
402 offset_in_page(seg->mr_offset));
403 else
404 sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
405 seg->mr_len);
406
407 ++seg;
408 ++i;
409 if (holes_ok)
410 continue;
411 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
412 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
413 break;
414 }
415 mr->mr_dir = rpcrdma_data_dir(writing);
416
417 mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
418 if (!mr->mr_nents)
419 goto out_dmamap_err;
420 trace_xprtrdma_dma_map(mr);
421
422 ibmr = frwr->fr_mr;
423 n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
424 if (unlikely(n != mr->mr_nents))
425 goto out_mapmr_err;
426
427 key = (u8)(ibmr->rkey & 0x000000FF);
428 ib_update_fast_reg_key(ibmr, ++key);
429
430 reg_wr = &frwr->fr_regwr;
431 reg_wr->mr = ibmr;
432 reg_wr->key = ibmr->rkey;
433 reg_wr->access = writing ?
434 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
435 IB_ACCESS_REMOTE_READ;
436
437 mr->mr_handle = ibmr->rkey;
438 mr->mr_length = ibmr->length;
439 mr->mr_offset = ibmr->iova;
440
441 *out = mr;
442 return seg;
443
444out_dmamap_err:
445 pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
446 mr->mr_sg, i);
447 frwr->fr_state = FRWR_IS_INVALID;
448 rpcrdma_mr_put(mr);
449 return ERR_PTR(-EIO);
450
451out_mapmr_err:
452 pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
453 frwr->fr_mr, n, mr->mr_nents);
454 rpcrdma_mr_defer_recovery(mr);
455 return ERR_PTR(-EIO);
456}
457
458
459
460
461
462
463
464static int
465frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
466{
467 struct ib_send_wr *post_wr;
468 struct rpcrdma_mr *mr;
469
470 post_wr = &req->rl_sendctx->sc_wr;
471 list_for_each_entry(mr, &req->rl_registered, mr_list) {
472 struct rpcrdma_frwr *frwr;
473
474 frwr = &mr->frwr;
475
476 frwr->fr_cqe.done = frwr_wc_fastreg;
477 frwr->fr_regwr.wr.next = post_wr;
478 frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
479 frwr->fr_regwr.wr.num_sge = 0;
480 frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
481 frwr->fr_regwr.wr.send_flags = 0;
482
483 post_wr = &frwr->fr_regwr.wr;
484 }
485
486
487
488
489 return ib_post_send(ia->ri_id->qp, post_wr, NULL);
490}
491
492
493
494static void
495frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
496{
497 struct rpcrdma_mr *mr;
498
499 list_for_each_entry(mr, mrs, mr_list)
500 if (mr->mr_handle == rep->rr_inv_rkey) {
501 list_del_init(&mr->mr_list);
502 trace_xprtrdma_remoteinv(mr);
503 mr->frwr.fr_state = FRWR_IS_INVALID;
504 rpcrdma_mr_unmap_and_put(mr);
505 break;
506 }
507}
508
509
510
511
512
513
514
515
516
517static void
518frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
519{
520 struct ib_send_wr *first, **prev, *last;
521 const struct ib_send_wr *bad_wr;
522 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
523 struct rpcrdma_frwr *frwr;
524 struct rpcrdma_mr *mr;
525 int count, rc;
526
527
528
529
530
531
532 frwr = NULL;
533 count = 0;
534 prev = &first;
535 list_for_each_entry(mr, mrs, mr_list) {
536 mr->frwr.fr_state = FRWR_IS_INVALID;
537
538 frwr = &mr->frwr;
539 trace_xprtrdma_localinv(mr);
540
541 frwr->fr_cqe.done = frwr_wc_localinv;
542 last = &frwr->fr_invwr;
543 memset(last, 0, sizeof(*last));
544 last->wr_cqe = &frwr->fr_cqe;
545 last->opcode = IB_WR_LOCAL_INV;
546 last->ex.invalidate_rkey = mr->mr_handle;
547 count++;
548
549 *prev = last;
550 prev = &last->next;
551 }
552 if (!frwr)
553 goto unmap;
554
555
556
557
558
559 last->send_flags = IB_SEND_SIGNALED;
560 frwr->fr_cqe.done = frwr_wc_localinv_wake;
561 reinit_completion(&frwr->fr_linv_done);
562
563
564
565
566
567 r_xprt->rx_stats.local_inv_needed++;
568 bad_wr = NULL;
569 rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
570 if (bad_wr != first)
571 wait_for_completion(&frwr->fr_linv_done);
572 if (rc)
573 goto reset_mrs;
574
575
576
577
578unmap:
579 while (!list_empty(mrs)) {
580 mr = rpcrdma_mr_pop(mrs);
581 rpcrdma_mr_unmap_and_put(mr);
582 }
583 return;
584
585reset_mrs:
586 pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
587
588
589
590
591 while (bad_wr) {
592 frwr = container_of(bad_wr, struct rpcrdma_frwr,
593 fr_invwr);
594 mr = container_of(frwr, struct rpcrdma_mr, frwr);
595
596 __frwr_mr_reset(ia, mr);
597
598 bad_wr = bad_wr->next;
599 }
600 goto unmap;
601}
602
603const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
604 .ro_map = frwr_op_map,
605 .ro_send = frwr_op_send,
606 .ro_reminv = frwr_op_reminv,
607 .ro_unmap_sync = frwr_op_unmap_sync,
608 .ro_recover_mr = frwr_op_recover_mr,
609 .ro_open = frwr_op_open,
610 .ro_maxpages = frwr_op_maxpages,
611 .ro_init_mr = frwr_op_init_mr,
612 .ro_release_mr = frwr_op_release_mr,
613 .ro_displayname = "frwr",
614 .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK,
615};
616