1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43#include <linux/sunrpc/svc_rdma.h>
44
45#include "xprt_rdma.h"
46#include <trace/events/rpcrdma.h>
47
48#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
49# define RPCDBG_FACILITY RPCDBG_TRANS
50#endif
51
52static void frwr_cid_init(struct rpcrdma_ep *ep,
53 struct rpcrdma_mr *mr)
54{
55 struct rpc_rdma_cid *cid = &mr->mr_cid;
56
57 cid->ci_queue_id = ep->re_attr.send_cq->res.id;
58 cid->ci_completion_id = mr->mr_ibmr->res.id;
59}
60
61static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
62{
63 if (mr->mr_device) {
64 trace_xprtrdma_mr_unmap(mr);
65 ib_dma_unmap_sg(mr->mr_device, mr->mr_sg, mr->mr_nents,
66 mr->mr_dir);
67 mr->mr_device = NULL;
68 }
69}
70
71
72
73
74
75
76void frwr_mr_release(struct rpcrdma_mr *mr)
77{
78 int rc;
79
80 frwr_mr_unmap(mr->mr_xprt, mr);
81
82 rc = ib_dereg_mr(mr->mr_ibmr);
83 if (rc)
84 trace_xprtrdma_frwr_dereg(mr, rc);
85 kfree(mr->mr_sg);
86 kfree(mr);
87}
88
89static void frwr_mr_put(struct rpcrdma_mr *mr)
90{
91 frwr_mr_unmap(mr->mr_xprt, mr);
92
93
94
95
96 rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
97}
98
99
100
101
102
103
104
105
106
107
108
109void frwr_reset(struct rpcrdma_req *req)
110{
111 struct rpcrdma_mr *mr;
112
113 while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
114 frwr_mr_put(mr);
115}
116
117
118
119
120
121
122
123
124
125int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
126{
127 struct rpcrdma_ep *ep = r_xprt->rx_ep;
128 unsigned int depth = ep->re_max_fr_depth;
129 struct scatterlist *sg;
130 struct ib_mr *frmr;
131 int rc;
132
133 frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth);
134 if (IS_ERR(frmr))
135 goto out_mr_err;
136
137 sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS);
138 if (!sg)
139 goto out_list_err;
140
141 mr->mr_xprt = r_xprt;
142 mr->mr_ibmr = frmr;
143 mr->mr_device = NULL;
144 INIT_LIST_HEAD(&mr->mr_list);
145 init_completion(&mr->mr_linv_done);
146 frwr_cid_init(ep, mr);
147
148 sg_init_table(sg, depth);
149 mr->mr_sg = sg;
150 return 0;
151
152out_mr_err:
153 rc = PTR_ERR(frmr);
154 trace_xprtrdma_frwr_alloc(mr, rc);
155 return rc;
156
157out_list_err:
158 ib_dereg_mr(frmr);
159 return -ENOMEM;
160}
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
180{
181 const struct ib_device_attr *attrs = &device->attrs;
182 int max_qp_wr, depth, delta;
183 unsigned int max_sge;
184
185 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
186 attrs->max_fast_reg_page_list_len == 0) {
187 pr_err("rpcrdma: 'frwr' mode is not supported by device %s\n",
188 device->name);
189 return -EINVAL;
190 }
191
192 max_sge = min_t(unsigned int, attrs->max_send_sge,
193 RPCRDMA_MAX_SEND_SGES);
194 if (max_sge < RPCRDMA_MIN_SEND_SGES) {
195 pr_err("rpcrdma: HCA provides only %u send SGEs\n", max_sge);
196 return -ENOMEM;
197 }
198 ep->re_attr.cap.max_send_sge = max_sge;
199 ep->re_attr.cap.max_recv_sge = 1;
200
201 ep->re_mrtype = IB_MR_TYPE_MEM_REG;
202 if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
203 ep->re_mrtype = IB_MR_TYPE_SG_GAPS;
204
205
206
207
208
209 if (attrs->max_sge_rd > RPCRDMA_MAX_HDR_SEGS)
210 ep->re_max_fr_depth = attrs->max_sge_rd;
211 else
212 ep->re_max_fr_depth = attrs->max_fast_reg_page_list_len;
213 if (ep->re_max_fr_depth > RPCRDMA_MAX_DATA_SEGS)
214 ep->re_max_fr_depth = RPCRDMA_MAX_DATA_SEGS;
215
216
217
218
219
220
221
222
223
224
225 depth = 7;
226
227
228
229
230 if (ep->re_max_fr_depth < RPCRDMA_MAX_DATA_SEGS) {
231 delta = RPCRDMA_MAX_DATA_SEGS - ep->re_max_fr_depth;
232 do {
233 depth += 2;
234 delta -= ep->re_max_fr_depth;
235 } while (delta > 0);
236 }
237
238 max_qp_wr = attrs->max_qp_wr;
239 max_qp_wr -= RPCRDMA_BACKWARD_WRS;
240 max_qp_wr -= 1;
241 if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
242 return -ENOMEM;
243 if (ep->re_max_requests > max_qp_wr)
244 ep->re_max_requests = max_qp_wr;
245 ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth;
246 if (ep->re_attr.cap.max_send_wr > max_qp_wr) {
247 ep->re_max_requests = max_qp_wr / depth;
248 if (!ep->re_max_requests)
249 return -ENOMEM;
250 ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth;
251 }
252 ep->re_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
253 ep->re_attr.cap.max_send_wr += 1;
254 ep->re_attr.cap.max_recv_wr = ep->re_max_requests;
255 ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
256 ep->re_attr.cap.max_recv_wr += RPCRDMA_MAX_RECV_BATCH;
257 ep->re_attr.cap.max_recv_wr += 1;
258
259 ep->re_max_rdma_segs =
260 DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ep->re_max_fr_depth);
261
262 ep->re_max_rdma_segs += 2;
263 if (ep->re_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS)
264 ep->re_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS;
265
266
267
268
269
270
271 if ((ep->re_max_rdma_segs * ep->re_max_fr_depth) < RPCRDMA_MAX_SEGS)
272 return -ENOMEM;
273
274 return 0;
275}
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
293 struct rpcrdma_mr_seg *seg,
294 int nsegs, bool writing, __be32 xid,
295 struct rpcrdma_mr *mr)
296{
297 struct rpcrdma_ep *ep = r_xprt->rx_ep;
298 struct ib_reg_wr *reg_wr;
299 int i, n, dma_nents;
300 struct ib_mr *ibmr;
301 u8 key;
302
303 if (nsegs > ep->re_max_fr_depth)
304 nsegs = ep->re_max_fr_depth;
305 for (i = 0; i < nsegs;) {
306 sg_set_page(&mr->mr_sg[i], seg->mr_page,
307 seg->mr_len, seg->mr_offset);
308
309 ++seg;
310 ++i;
311 if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS)
312 continue;
313 if ((i < nsegs && seg->mr_offset) ||
314 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
315 break;
316 }
317 mr->mr_dir = rpcrdma_data_dir(writing);
318 mr->mr_nents = i;
319
320 dma_nents = ib_dma_map_sg(ep->re_id->device, mr->mr_sg, mr->mr_nents,
321 mr->mr_dir);
322 if (!dma_nents)
323 goto out_dmamap_err;
324 mr->mr_device = ep->re_id->device;
325
326 ibmr = mr->mr_ibmr;
327 n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
328 if (n != dma_nents)
329 goto out_mapmr_err;
330
331 ibmr->iova &= 0x00000000ffffffff;
332 ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32;
333 key = (u8)(ibmr->rkey & 0x000000FF);
334 ib_update_fast_reg_key(ibmr, ++key);
335
336 reg_wr = &mr->mr_regwr;
337 reg_wr->mr = ibmr;
338 reg_wr->key = ibmr->rkey;
339 reg_wr->access = writing ?
340 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
341 IB_ACCESS_REMOTE_READ;
342
343 mr->mr_handle = ibmr->rkey;
344 mr->mr_length = ibmr->length;
345 mr->mr_offset = ibmr->iova;
346 trace_xprtrdma_mr_map(mr);
347
348 return seg;
349
350out_dmamap_err:
351 trace_xprtrdma_frwr_sgerr(mr, i);
352 return ERR_PTR(-EIO);
353
354out_mapmr_err:
355 trace_xprtrdma_frwr_maperr(mr, n);
356 return ERR_PTR(-EIO);
357}
358
359
360
361
362
363
364
365
366static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
367{
368 struct ib_cqe *cqe = wc->wr_cqe;
369 struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
370
371
372 trace_xprtrdma_wc_fastreg(wc, &mr->mr_cid);
373
374 rpcrdma_flush_disconnect(cq->cq_context, wc);
375}
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
392{
393 struct ib_send_wr *post_wr, *send_wr = &req->rl_wr;
394 struct rpcrdma_ep *ep = r_xprt->rx_ep;
395 struct rpcrdma_mr *mr;
396 unsigned int num_wrs;
397
398 num_wrs = 1;
399 post_wr = send_wr;
400 list_for_each_entry(mr, &req->rl_registered, mr_list) {
401 trace_xprtrdma_mr_fastreg(mr);
402
403 mr->mr_cqe.done = frwr_wc_fastreg;
404 mr->mr_regwr.wr.next = post_wr;
405 mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
406 mr->mr_regwr.wr.num_sge = 0;
407 mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
408 mr->mr_regwr.wr.send_flags = 0;
409 post_wr = &mr->mr_regwr.wr;
410 ++num_wrs;
411 }
412
413 if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) {
414 send_wr->send_flags |= IB_SEND_SIGNALED;
415 ep->re_send_count = min_t(unsigned int, ep->re_send_batch,
416 num_wrs - ep->re_send_count);
417 } else {
418 send_wr->send_flags &= ~IB_SEND_SIGNALED;
419 ep->re_send_count -= num_wrs;
420 }
421
422 trace_xprtrdma_post_send(req);
423 return ib_post_send(ep->re_id->qp, post_wr, NULL);
424}
425
426
427
428
429
430
431
432void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
433{
434 struct rpcrdma_mr *mr;
435
436 list_for_each_entry(mr, mrs, mr_list)
437 if (mr->mr_handle == rep->rr_inv_rkey) {
438 list_del_init(&mr->mr_list);
439 trace_xprtrdma_mr_reminv(mr);
440 frwr_mr_put(mr);
441 break;
442 }
443}
444
445static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
446{
447 if (likely(wc->status == IB_WC_SUCCESS))
448 frwr_mr_put(mr);
449}
450
451
452
453
454
455
456
457static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
458{
459 struct ib_cqe *cqe = wc->wr_cqe;
460 struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
461
462
463 trace_xprtrdma_wc_li(wc, &mr->mr_cid);
464 frwr_mr_done(wc, mr);
465
466 rpcrdma_flush_disconnect(cq->cq_context, wc);
467}
468
469
470
471
472
473
474
475
476static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
477{
478 struct ib_cqe *cqe = wc->wr_cqe;
479 struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
480
481
482 trace_xprtrdma_wc_li_wake(wc, &mr->mr_cid);
483 frwr_mr_done(wc, mr);
484 complete(&mr->mr_linv_done);
485
486 rpcrdma_flush_disconnect(cq->cq_context, wc);
487}
488
489
490
491
492
493
494
495
496
497
498
499
500void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
501{
502 struct ib_send_wr *first, **prev, *last;
503 struct rpcrdma_ep *ep = r_xprt->rx_ep;
504 const struct ib_send_wr *bad_wr;
505 struct rpcrdma_mr *mr;
506 int rc;
507
508
509
510
511
512
513 prev = &first;
514 while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
515
516 trace_xprtrdma_mr_localinv(mr);
517 r_xprt->rx_stats.local_inv_needed++;
518
519 last = &mr->mr_invwr;
520 last->next = NULL;
521 last->wr_cqe = &mr->mr_cqe;
522 last->sg_list = NULL;
523 last->num_sge = 0;
524 last->opcode = IB_WR_LOCAL_INV;
525 last->send_flags = IB_SEND_SIGNALED;
526 last->ex.invalidate_rkey = mr->mr_handle;
527
528 last->wr_cqe->done = frwr_wc_localinv;
529
530 *prev = last;
531 prev = &last->next;
532 }
533 mr = container_of(last, struct rpcrdma_mr, mr_invwr);
534
535
536
537
538
539 last->wr_cqe->done = frwr_wc_localinv_wake;
540 reinit_completion(&mr->mr_linv_done);
541
542
543
544
545
546 bad_wr = NULL;
547 rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
548
549
550
551
552
553 if (bad_wr != first)
554 wait_for_completion(&mr->mr_linv_done);
555 if (!rc)
556 return;
557
558
559 trace_xprtrdma_post_linv_err(req, rc);
560}
561
562
563
564
565
566
567
568static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
569{
570 struct ib_cqe *cqe = wc->wr_cqe;
571 struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
572 struct rpcrdma_rep *rep;
573
574
575 trace_xprtrdma_wc_li_done(wc, &mr->mr_cid);
576
577
578 rep = mr->mr_req->rl_reply;
579 smp_rmb();
580
581 if (wc->status != IB_WC_SUCCESS) {
582 if (rep)
583 rpcrdma_unpin_rqst(rep);
584 rpcrdma_flush_disconnect(cq->cq_context, wc);
585 return;
586 }
587 frwr_mr_put(mr);
588 rpcrdma_complete_rqst(rep);
589}
590
591
592
593
594
595
596
597
598
599
600
601void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
602{
603 struct ib_send_wr *first, *last, **prev;
604 struct rpcrdma_ep *ep = r_xprt->rx_ep;
605 struct rpcrdma_mr *mr;
606 int rc;
607
608
609
610
611 prev = &first;
612 while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
613
614 trace_xprtrdma_mr_localinv(mr);
615 r_xprt->rx_stats.local_inv_needed++;
616
617 last = &mr->mr_invwr;
618 last->next = NULL;
619 last->wr_cqe = &mr->mr_cqe;
620 last->sg_list = NULL;
621 last->num_sge = 0;
622 last->opcode = IB_WR_LOCAL_INV;
623 last->send_flags = IB_SEND_SIGNALED;
624 last->ex.invalidate_rkey = mr->mr_handle;
625
626 last->wr_cqe->done = frwr_wc_localinv;
627
628 *prev = last;
629 prev = &last->next;
630 }
631
632
633
634
635
636
637 last->wr_cqe->done = frwr_wc_localinv_done;
638
639
640
641
642
643 rc = ib_post_send(ep->re_id->qp, first, NULL);
644 if (!rc)
645 return;
646
647
648 trace_xprtrdma_post_linv_err(req, rc);
649
650
651
652
653
654
655 rpcrdma_unpin_rqst(req->rl_reply);
656}
657