1
2
3
4
5
6
7#include "rxe.h"
8#include "rxe_loc.h"
9
10
11
12
13
14u8 rxe_get_next_key(u32 last_key)
15{
16 u8 key;
17
18 do {
19 get_random_bytes(&key, 1);
20 } while (key == last_key);
21
22 return key;
23}
24
25int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26{
27 switch (mr->type) {
28 case RXE_MR_TYPE_DMA:
29 return 0;
30
31 case RXE_MR_TYPE_MR:
32 if (iova < mr->iova || length > mr->length ||
33 iova > mr->iova + mr->length - length)
34 return -EFAULT;
35 return 0;
36
37 default:
38 return -EFAULT;
39 }
40}
41
42#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \
43 | IB_ACCESS_REMOTE_WRITE \
44 | IB_ACCESS_REMOTE_ATOMIC)
45
46static void rxe_mr_init(int access, struct rxe_mr *mr)
47{
48 u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
49 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
50
51 mr->ibmr.lkey = lkey;
52 mr->ibmr.rkey = rkey;
53 mr->state = RXE_MR_STATE_INVALID;
54 mr->type = RXE_MR_TYPE_NONE;
55 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
56}
57
58static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
59{
60 int i;
61 int num_map;
62 struct rxe_map **map = mr->map;
63
64 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
65
66 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
67 if (!mr->map)
68 goto err1;
69
70 for (i = 0; i < num_map; i++) {
71 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
72 if (!mr->map[i])
73 goto err2;
74 }
75
76 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
77
78 mr->map_shift = ilog2(RXE_BUF_PER_MAP);
79 mr->map_mask = RXE_BUF_PER_MAP - 1;
80
81 mr->num_buf = num_buf;
82 mr->num_map = num_map;
83 mr->max_buf = num_map * RXE_BUF_PER_MAP;
84
85 return 0;
86
87err2:
88 for (i--; i >= 0; i--)
89 kfree(mr->map[i]);
90
91 kfree(mr->map);
92err1:
93 return -ENOMEM;
94}
95
96void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
97{
98 rxe_mr_init(access, mr);
99
100 mr->ibmr.pd = &pd->ibpd;
101 mr->access = access;
102 mr->state = RXE_MR_STATE_VALID;
103 mr->type = RXE_MR_TYPE_DMA;
104}
105
106int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
107 int access, struct ib_udata *udata, struct rxe_mr *mr)
108{
109 struct rxe_map **map;
110 struct rxe_phys_buf *buf = NULL;
111 struct ib_umem *umem;
112 struct sg_page_iter sg_iter;
113 int num_buf;
114 void *vaddr;
115 int err;
116 int i;
117
118 umem = ib_umem_get(udata, start, length, access);
119 if (IS_ERR(umem)) {
120 pr_warn("%s: Unable to pin memory region err = %d\n",
121 __func__, (int)PTR_ERR(umem));
122 err = PTR_ERR(umem);
123 goto err_out;
124 }
125
126 mr->umem = umem;
127 num_buf = ib_umem_num_pages(umem);
128
129 rxe_mr_init(access, mr);
130
131 err = rxe_mr_alloc(mr, num_buf);
132 if (err) {
133 pr_warn("%s: Unable to allocate memory for map\n",
134 __func__);
135 goto err_release_umem;
136 }
137
138 mr->page_shift = PAGE_SHIFT;
139 mr->page_mask = PAGE_SIZE - 1;
140
141 num_buf = 0;
142 map = mr->map;
143 if (length > 0) {
144 buf = map[0]->buf;
145
146 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
147 if (num_buf >= RXE_BUF_PER_MAP) {
148 map++;
149 buf = map[0]->buf;
150 num_buf = 0;
151 }
152
153 vaddr = page_address(sg_page_iter_page(&sg_iter));
154 if (!vaddr) {
155 pr_warn("%s: Unable to get virtual address\n",
156 __func__);
157 err = -ENOMEM;
158 goto err_cleanup_map;
159 }
160
161 buf->addr = (uintptr_t)vaddr;
162 buf->size = PAGE_SIZE;
163 num_buf++;
164 buf++;
165
166 }
167 }
168
169 mr->ibmr.pd = &pd->ibpd;
170 mr->umem = umem;
171 mr->access = access;
172 mr->length = length;
173 mr->iova = iova;
174 mr->va = start;
175 mr->offset = ib_umem_offset(umem);
176 mr->state = RXE_MR_STATE_VALID;
177 mr->type = RXE_MR_TYPE_MR;
178
179 return 0;
180
181err_cleanup_map:
182 for (i = 0; i < mr->num_map; i++)
183 kfree(mr->map[i]);
184 kfree(mr->map);
185err_release_umem:
186 ib_umem_release(umem);
187err_out:
188 return err;
189}
190
191int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
192{
193 int err;
194
195 rxe_mr_init(0, mr);
196
197
198 mr->ibmr.rkey = mr->ibmr.lkey;
199
200 err = rxe_mr_alloc(mr, max_pages);
201 if (err)
202 goto err1;
203
204 mr->ibmr.pd = &pd->ibpd;
205 mr->max_buf = max_pages;
206 mr->state = RXE_MR_STATE_FREE;
207 mr->type = RXE_MR_TYPE_MR;
208
209 return 0;
210
211err1:
212 return err;
213}
214
215static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
216 size_t *offset_out)
217{
218 size_t offset = iova - mr->iova + mr->offset;
219 int map_index;
220 int buf_index;
221 u64 length;
222
223 if (likely(mr->page_shift)) {
224 *offset_out = offset & mr->page_mask;
225 offset >>= mr->page_shift;
226 *n_out = offset & mr->map_mask;
227 *m_out = offset >> mr->map_shift;
228 } else {
229 map_index = 0;
230 buf_index = 0;
231
232 length = mr->map[map_index]->buf[buf_index].size;
233
234 while (offset >= length) {
235 offset -= length;
236 buf_index++;
237
238 if (buf_index == RXE_BUF_PER_MAP) {
239 map_index++;
240 buf_index = 0;
241 }
242 length = mr->map[map_index]->buf[buf_index].size;
243 }
244
245 *m_out = map_index;
246 *n_out = buf_index;
247 *offset_out = offset;
248 }
249}
250
251void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
252{
253 size_t offset;
254 int m, n;
255 void *addr;
256
257 if (mr->state != RXE_MR_STATE_VALID) {
258 pr_warn("mr not in valid state\n");
259 addr = NULL;
260 goto out;
261 }
262
263 if (!mr->map) {
264 addr = (void *)(uintptr_t)iova;
265 goto out;
266 }
267
268 if (mr_check_range(mr, iova, length)) {
269 pr_warn("range violation\n");
270 addr = NULL;
271 goto out;
272 }
273
274 lookup_iova(mr, iova, &m, &n, &offset);
275
276 if (offset + length > mr->map[m]->buf[n].size) {
277 pr_warn("crosses page boundary\n");
278 addr = NULL;
279 goto out;
280 }
281
282 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
283
284out:
285 return addr;
286}
287
288
289
290
291
292int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
293 enum rxe_mr_copy_dir dir, u32 *crcp)
294{
295 int err;
296 int bytes;
297 u8 *va;
298 struct rxe_map **map;
299 struct rxe_phys_buf *buf;
300 int m;
301 int i;
302 size_t offset;
303 u32 crc = crcp ? (*crcp) : 0;
304
305 if (length == 0)
306 return 0;
307
308 if (mr->type == RXE_MR_TYPE_DMA) {
309 u8 *src, *dest;
310
311 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
312
313 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
314
315 memcpy(dest, src, length);
316
317 if (crcp)
318 *crcp = rxe_crc32(to_rdev(mr->ibmr.device), *crcp, dest,
319 length);
320
321 return 0;
322 }
323
324 WARN_ON_ONCE(!mr->map);
325
326 err = mr_check_range(mr, iova, length);
327 if (err) {
328 err = -EFAULT;
329 goto err1;
330 }
331
332 lookup_iova(mr, iova, &m, &i, &offset);
333
334 map = mr->map + m;
335 buf = map[0]->buf + i;
336
337 while (length > 0) {
338 u8 *src, *dest;
339
340 va = (u8 *)(uintptr_t)buf->addr + offset;
341 src = (dir == RXE_TO_MR_OBJ) ? addr : va;
342 dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
343
344 bytes = buf->size - offset;
345
346 if (bytes > length)
347 bytes = length;
348
349 memcpy(dest, src, bytes);
350
351 if (crcp)
352 crc = rxe_crc32(to_rdev(mr->ibmr.device), crc, dest,
353 bytes);
354
355 length -= bytes;
356 addr += bytes;
357
358 offset = 0;
359 buf++;
360 i++;
361
362 if (i == RXE_BUF_PER_MAP) {
363 i = 0;
364 map++;
365 buf = map[0]->buf;
366 }
367 }
368
369 if (crcp)
370 *crcp = crc;
371
372 return 0;
373
374err1:
375 return err;
376}
377
378
379
380
381int copy_data(
382 struct rxe_pd *pd,
383 int access,
384 struct rxe_dma_info *dma,
385 void *addr,
386 int length,
387 enum rxe_mr_copy_dir dir,
388 u32 *crcp)
389{
390 int bytes;
391 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
392 int offset = dma->sge_offset;
393 int resid = dma->resid;
394 struct rxe_mr *mr = NULL;
395 u64 iova;
396 int err;
397
398 if (length == 0)
399 return 0;
400
401 if (length > resid) {
402 err = -EINVAL;
403 goto err2;
404 }
405
406 if (sge->length && (offset < sge->length)) {
407 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
408 if (!mr) {
409 err = -EINVAL;
410 goto err1;
411 }
412 }
413
414 while (length > 0) {
415 bytes = length;
416
417 if (offset >= sge->length) {
418 if (mr) {
419 rxe_drop_ref(mr);
420 mr = NULL;
421 }
422 sge++;
423 dma->cur_sge++;
424 offset = 0;
425
426 if (dma->cur_sge >= dma->num_sge) {
427 err = -ENOSPC;
428 goto err2;
429 }
430
431 if (sge->length) {
432 mr = lookup_mr(pd, access, sge->lkey,
433 RXE_LOOKUP_LOCAL);
434 if (!mr) {
435 err = -EINVAL;
436 goto err1;
437 }
438 } else {
439 continue;
440 }
441 }
442
443 if (bytes > sge->length - offset)
444 bytes = sge->length - offset;
445
446 if (bytes > 0) {
447 iova = sge->addr + offset;
448
449 err = rxe_mr_copy(mr, iova, addr, bytes, dir, crcp);
450 if (err)
451 goto err2;
452
453 offset += bytes;
454 resid -= bytes;
455 length -= bytes;
456 addr += bytes;
457 }
458 }
459
460 dma->sge_offset = offset;
461 dma->resid = resid;
462
463 if (mr)
464 rxe_drop_ref(mr);
465
466 return 0;
467
468err2:
469 if (mr)
470 rxe_drop_ref(mr);
471err1:
472 return err;
473}
474
475int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
476{
477 struct rxe_sge *sge = &dma->sge[dma->cur_sge];
478 int offset = dma->sge_offset;
479 int resid = dma->resid;
480
481 while (length) {
482 unsigned int bytes;
483
484 if (offset >= sge->length) {
485 sge++;
486 dma->cur_sge++;
487 offset = 0;
488 if (dma->cur_sge >= dma->num_sge)
489 return -ENOSPC;
490 }
491
492 bytes = length;
493
494 if (bytes > sge->length - offset)
495 bytes = sge->length - offset;
496
497 offset += bytes;
498 resid -= bytes;
499 length -= bytes;
500 }
501
502 dma->sge_offset = offset;
503 dma->resid = resid;
504
505 return 0;
506}
507
508
509
510
511
512
513
514struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
515 enum rxe_mr_lookup_type type)
516{
517 struct rxe_mr *mr;
518 struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
519 int index = key >> 8;
520
521 mr = rxe_pool_get_index(&rxe->mr_pool, index);
522 if (!mr)
523 return NULL;
524
525 if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
526 (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
527 mr_pd(mr) != pd || (access && !(access & mr->access)) ||
528 mr->state != RXE_MR_STATE_VALID)) {
529 rxe_drop_ref(mr);
530 mr = NULL;
531 }
532
533 return mr;
534}
535
536int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
537{
538 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
539 struct rxe_mr *mr;
540 int ret;
541
542 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
543 if (!mr) {
544 pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
545 ret = -EINVAL;
546 goto err;
547 }
548
549 if (rkey != mr->ibmr.rkey) {
550 pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
551 __func__, rkey, mr->ibmr.rkey);
552 ret = -EINVAL;
553 goto err_drop_ref;
554 }
555
556 if (atomic_read(&mr->num_mw) > 0) {
557 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
558 __func__);
559 ret = -EINVAL;
560 goto err_drop_ref;
561 }
562
563 mr->state = RXE_MR_STATE_FREE;
564 ret = 0;
565
566err_drop_ref:
567 rxe_drop_ref(mr);
568err:
569 return ret;
570}
571
572int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
573{
574 struct rxe_mr *mr = to_rmr(ibmr);
575
576 if (atomic_read(&mr->num_mw) > 0) {
577 pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
578 __func__);
579 return -EINVAL;
580 }
581
582 mr->state = RXE_MR_STATE_ZOMBIE;
583 rxe_drop_ref(mr_pd(mr));
584 rxe_drop_index(mr);
585 rxe_drop_ref(mr);
586
587 return 0;
588}
589
590void rxe_mr_cleanup(struct rxe_pool_entry *arg)
591{
592 struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
593 int i;
594
595 ib_umem_release(mr->umem);
596
597 if (mr->map) {
598 for (i = 0; i < mr->num_map; i++)
599 kfree(mr->map[i]);
600
601 kfree(mr->map);
602 }
603}
604