1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#include <linux/kref.h>
35#include <linux/random.h>
36#include <linux/debugfs.h>
37#include <linux/export.h>
38#include <linux/delay.h>
39#include <rdma/ib_umem.h>
40#include <rdma/ib_umem_odp.h>
41#include <rdma/ib_verbs.h>
42#include "mlx5_ib.h"
43
44enum {
45 MAX_PENDING_REG_MR = 8,
46};
47
48#define MLX5_UMR_ALIGN 2048
49
50static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
51static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
52static int mr_cache_max_order(struct mlx5_ib_dev *dev);
53static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
54
55static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
56{
57 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
58}
59
60static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
61{
62 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
63
64#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
65
66 synchronize_srcu(&dev->mr_srcu);
67#endif
68
69 return err;
70}
71
72static int order2idx(struct mlx5_ib_dev *dev, int order)
73{
74 struct mlx5_mr_cache *cache = &dev->cache;
75
76 if (order < cache->ent[0].order)
77 return 0;
78 else
79 return order - cache->ent[0].order;
80}
81
82static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
83{
84 return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
85 length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
86}
87
88#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
89static void update_odp_mr(struct mlx5_ib_mr *mr)
90{
91 if (mr->umem->is_odp) {
92
93
94
95
96
97
98
99 smp_wmb();
100 to_ib_umem_odp(mr->umem)->private = mr;
101
102
103
104
105
106
107
108
109
110
111 smp_wmb();
112 }
113}
114#endif
115
116static void reg_mr_callback(int status, void *context)
117{
118 struct mlx5_ib_mr *mr = context;
119 struct mlx5_ib_dev *dev = mr->dev;
120 struct mlx5_mr_cache *cache = &dev->cache;
121 int c = order2idx(dev, mr->order);
122 struct mlx5_cache_ent *ent = &cache->ent[c];
123 u8 key;
124 unsigned long flags;
125 struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
126 int err;
127
128 spin_lock_irqsave(&ent->lock, flags);
129 ent->pending--;
130 spin_unlock_irqrestore(&ent->lock, flags);
131 if (status) {
132 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
133 kfree(mr);
134 dev->fill_delay = 1;
135 mod_timer(&dev->delay_timer, jiffies + HZ);
136 return;
137 }
138
139 mr->mmkey.type = MLX5_MKEY_MR;
140 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
141 key = dev->mdev->priv.mkey_key++;
142 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
143 mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
144
145 cache->last_add = jiffies;
146
147 spin_lock_irqsave(&ent->lock, flags);
148 list_add_tail(&mr->list, &ent->head);
149 ent->cur++;
150 ent->size++;
151 spin_unlock_irqrestore(&ent->lock, flags);
152
153 write_lock_irqsave(&table->lock, flags);
154 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
155 &mr->mmkey);
156 if (err)
157 pr_err("Error inserting to mkey tree. 0x%x\n", -err);
158 write_unlock_irqrestore(&table->lock, flags);
159
160 if (!completion_done(&ent->compl))
161 complete(&ent->compl);
162}
163
164static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
165{
166 struct mlx5_mr_cache *cache = &dev->cache;
167 struct mlx5_cache_ent *ent = &cache->ent[c];
168 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
169 struct mlx5_ib_mr *mr;
170 void *mkc;
171 u32 *in;
172 int err = 0;
173 int i;
174
175 in = kzalloc(inlen, GFP_KERNEL);
176 if (!in)
177 return -ENOMEM;
178
179 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
180 for (i = 0; i < num; i++) {
181 if (ent->pending >= MAX_PENDING_REG_MR) {
182 err = -EAGAIN;
183 break;
184 }
185
186 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
187 if (!mr) {
188 err = -ENOMEM;
189 break;
190 }
191 mr->order = ent->order;
192 mr->allocated_from_cache = 1;
193 mr->dev = dev;
194
195 MLX5_SET(mkc, mkc, free, 1);
196 MLX5_SET(mkc, mkc, umr_en, 1);
197 MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
198 MLX5_SET(mkc, mkc, access_mode_4_2,
199 (ent->access_mode >> 2) & 0x7);
200
201 MLX5_SET(mkc, mkc, qpn, 0xffffff);
202 MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
203 MLX5_SET(mkc, mkc, log_page_size, ent->page);
204
205 spin_lock_irq(&ent->lock);
206 ent->pending++;
207 spin_unlock_irq(&ent->lock);
208 err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
209 in, inlen,
210 mr->out, sizeof(mr->out),
211 reg_mr_callback, mr);
212 if (err) {
213 spin_lock_irq(&ent->lock);
214 ent->pending--;
215 spin_unlock_irq(&ent->lock);
216 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
217 kfree(mr);
218 break;
219 }
220 }
221
222 kfree(in);
223 return err;
224}
225
226static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
227{
228 struct mlx5_mr_cache *cache = &dev->cache;
229 struct mlx5_cache_ent *ent = &cache->ent[c];
230 struct mlx5_ib_mr *tmp_mr;
231 struct mlx5_ib_mr *mr;
232 LIST_HEAD(del_list);
233 int i;
234
235 for (i = 0; i < num; i++) {
236 spin_lock_irq(&ent->lock);
237 if (list_empty(&ent->head)) {
238 spin_unlock_irq(&ent->lock);
239 break;
240 }
241 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
242 list_move(&mr->list, &del_list);
243 ent->cur--;
244 ent->size--;
245 spin_unlock_irq(&ent->lock);
246 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
247 }
248
249#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
250 synchronize_srcu(&dev->mr_srcu);
251#endif
252
253 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
254 list_del(&mr->list);
255 kfree(mr);
256 }
257}
258
259static ssize_t size_write(struct file *filp, const char __user *buf,
260 size_t count, loff_t *pos)
261{
262 struct mlx5_cache_ent *ent = filp->private_data;
263 struct mlx5_ib_dev *dev = ent->dev;
264 char lbuf[20] = {0};
265 u32 var;
266 int err;
267 int c;
268
269 count = min(count, sizeof(lbuf) - 1);
270 if (copy_from_user(lbuf, buf, count))
271 return -EFAULT;
272
273 c = order2idx(dev, ent->order);
274
275 if (sscanf(lbuf, "%u", &var) != 1)
276 return -EINVAL;
277
278 if (var < ent->limit)
279 return -EINVAL;
280
281 if (var > ent->size) {
282 do {
283 err = add_keys(dev, c, var - ent->size);
284 if (err && err != -EAGAIN)
285 return err;
286
287 usleep_range(3000, 5000);
288 } while (err);
289 } else if (var < ent->size) {
290 remove_keys(dev, c, ent->size - var);
291 }
292
293 return count;
294}
295
296static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
297 loff_t *pos)
298{
299 struct mlx5_cache_ent *ent = filp->private_data;
300 char lbuf[20];
301 int err;
302
303 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
304 if (err < 0)
305 return err;
306
307 return simple_read_from_buffer(buf, count, pos, lbuf, err);
308}
309
310static const struct file_operations size_fops = {
311 .owner = THIS_MODULE,
312 .open = simple_open,
313 .write = size_write,
314 .read = size_read,
315};
316
317static ssize_t limit_write(struct file *filp, const char __user *buf,
318 size_t count, loff_t *pos)
319{
320 struct mlx5_cache_ent *ent = filp->private_data;
321 struct mlx5_ib_dev *dev = ent->dev;
322 char lbuf[20] = {0};
323 u32 var;
324 int err;
325 int c;
326
327 count = min(count, sizeof(lbuf) - 1);
328 if (copy_from_user(lbuf, buf, count))
329 return -EFAULT;
330
331 c = order2idx(dev, ent->order);
332
333 if (sscanf(lbuf, "%u", &var) != 1)
334 return -EINVAL;
335
336 if (var > ent->size)
337 return -EINVAL;
338
339 ent->limit = var;
340
341 if (ent->cur < ent->limit) {
342 err = add_keys(dev, c, 2 * ent->limit - ent->cur);
343 if (err)
344 return err;
345 }
346
347 return count;
348}
349
350static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
351 loff_t *pos)
352{
353 struct mlx5_cache_ent *ent = filp->private_data;
354 char lbuf[20];
355 int err;
356
357 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
358 if (err < 0)
359 return err;
360
361 return simple_read_from_buffer(buf, count, pos, lbuf, err);
362}
363
364static const struct file_operations limit_fops = {
365 .owner = THIS_MODULE,
366 .open = simple_open,
367 .write = limit_write,
368 .read = limit_read,
369};
370
371static int someone_adding(struct mlx5_mr_cache *cache)
372{
373 int i;
374
375 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
376 if (cache->ent[i].cur < cache->ent[i].limit)
377 return 1;
378 }
379
380 return 0;
381}
382
383static void __cache_work_func(struct mlx5_cache_ent *ent)
384{
385 struct mlx5_ib_dev *dev = ent->dev;
386 struct mlx5_mr_cache *cache = &dev->cache;
387 int i = order2idx(dev, ent->order);
388 int err;
389
390 if (cache->stopped)
391 return;
392
393 ent = &dev->cache.ent[i];
394 if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
395 err = add_keys(dev, i, 1);
396 if (ent->cur < 2 * ent->limit) {
397 if (err == -EAGAIN) {
398 mlx5_ib_dbg(dev, "returned eagain, order %d\n",
399 i + 2);
400 queue_delayed_work(cache->wq, &ent->dwork,
401 msecs_to_jiffies(3));
402 } else if (err) {
403 mlx5_ib_warn(dev, "command failed order %d, err %d\n",
404 i + 2, err);
405 queue_delayed_work(cache->wq, &ent->dwork,
406 msecs_to_jiffies(1000));
407 } else {
408 queue_work(cache->wq, &ent->work);
409 }
410 }
411 } else if (ent->cur > 2 * ent->limit) {
412
413
414
415
416
417
418
419
420
421
422
423
424 if (!need_resched() && !someone_adding(cache) &&
425 time_after(jiffies, cache->last_add + 300 * HZ)) {
426 remove_keys(dev, i, 1);
427 if (ent->cur > ent->limit)
428 queue_work(cache->wq, &ent->work);
429 } else {
430 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
431 }
432 }
433}
434
435static void delayed_cache_work_func(struct work_struct *work)
436{
437 struct mlx5_cache_ent *ent;
438
439 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
440 __cache_work_func(ent);
441}
442
443static void cache_work_func(struct work_struct *work)
444{
445 struct mlx5_cache_ent *ent;
446
447 ent = container_of(work, struct mlx5_cache_ent, work);
448 __cache_work_func(ent);
449}
450
451struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
452{
453 struct mlx5_mr_cache *cache = &dev->cache;
454 struct mlx5_cache_ent *ent;
455 struct mlx5_ib_mr *mr;
456 int err;
457
458 if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
459 mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
460 return NULL;
461 }
462
463 ent = &cache->ent[entry];
464 while (1) {
465 spin_lock_irq(&ent->lock);
466 if (list_empty(&ent->head)) {
467 spin_unlock_irq(&ent->lock);
468
469 err = add_keys(dev, entry, 1);
470 if (err && err != -EAGAIN)
471 return ERR_PTR(err);
472
473 wait_for_completion(&ent->compl);
474 } else {
475 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
476 list);
477 list_del(&mr->list);
478 ent->cur--;
479 spin_unlock_irq(&ent->lock);
480 if (ent->cur < ent->limit)
481 queue_work(cache->wq, &ent->work);
482 return mr;
483 }
484 }
485}
486
487static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
488{
489 struct mlx5_mr_cache *cache = &dev->cache;
490 struct mlx5_ib_mr *mr = NULL;
491 struct mlx5_cache_ent *ent;
492 int last_umr_cache_entry;
493 int c;
494 int i;
495
496 c = order2idx(dev, order);
497 last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
498 if (c < 0 || c > last_umr_cache_entry) {
499 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
500 return NULL;
501 }
502
503 for (i = c; i <= last_umr_cache_entry; i++) {
504 ent = &cache->ent[i];
505
506 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
507
508 spin_lock_irq(&ent->lock);
509 if (!list_empty(&ent->head)) {
510 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
511 list);
512 list_del(&mr->list);
513 ent->cur--;
514 spin_unlock_irq(&ent->lock);
515 if (ent->cur < ent->limit)
516 queue_work(cache->wq, &ent->work);
517 break;
518 }
519 spin_unlock_irq(&ent->lock);
520
521 queue_work(cache->wq, &ent->work);
522 }
523
524 if (!mr)
525 cache->ent[c].miss++;
526
527 return mr;
528}
529
530void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
531{
532 struct mlx5_mr_cache *cache = &dev->cache;
533 struct mlx5_cache_ent *ent;
534 int shrink = 0;
535 int c;
536
537 if (!mr->allocated_from_cache)
538 return;
539
540 c = order2idx(dev, mr->order);
541 WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES);
542
543 if (unreg_umr(dev, mr)) {
544 mr->allocated_from_cache = false;
545 destroy_mkey(dev, mr);
546 ent = &cache->ent[c];
547 if (ent->cur < ent->limit)
548 queue_work(cache->wq, &ent->work);
549 return;
550 }
551
552 ent = &cache->ent[c];
553 spin_lock_irq(&ent->lock);
554 list_add_tail(&mr->list, &ent->head);
555 ent->cur++;
556 if (ent->cur > 2 * ent->limit)
557 shrink = 1;
558 spin_unlock_irq(&ent->lock);
559
560 if (shrink)
561 queue_work(cache->wq, &ent->work);
562}
563
564static void clean_keys(struct mlx5_ib_dev *dev, int c)
565{
566 struct mlx5_mr_cache *cache = &dev->cache;
567 struct mlx5_cache_ent *ent = &cache->ent[c];
568 struct mlx5_ib_mr *tmp_mr;
569 struct mlx5_ib_mr *mr;
570 LIST_HEAD(del_list);
571
572 cancel_delayed_work(&ent->dwork);
573 while (1) {
574 spin_lock_irq(&ent->lock);
575 if (list_empty(&ent->head)) {
576 spin_unlock_irq(&ent->lock);
577 break;
578 }
579 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
580 list_move(&mr->list, &del_list);
581 ent->cur--;
582 ent->size--;
583 spin_unlock_irq(&ent->lock);
584 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
585 }
586
587#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
588 synchronize_srcu(&dev->mr_srcu);
589#endif
590
591 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
592 list_del(&mr->list);
593 kfree(mr);
594 }
595}
596
597static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
598{
599 if (!mlx5_debugfs_root || dev->rep)
600 return;
601
602 debugfs_remove_recursive(dev->cache.root);
603 dev->cache.root = NULL;
604}
605
606static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
607{
608 struct mlx5_mr_cache *cache = &dev->cache;
609 struct mlx5_cache_ent *ent;
610 int i;
611
612 if (!mlx5_debugfs_root || dev->rep)
613 return 0;
614
615 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
616 if (!cache->root)
617 return -ENOMEM;
618
619 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
620 ent = &cache->ent[i];
621 sprintf(ent->name, "%d", ent->order);
622 ent->dir = debugfs_create_dir(ent->name, cache->root);
623 if (!ent->dir)
624 goto err;
625
626 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
627 &size_fops);
628 if (!ent->fsize)
629 goto err;
630
631 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
632 &limit_fops);
633 if (!ent->flimit)
634 goto err;
635
636 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
637 &ent->cur);
638 if (!ent->fcur)
639 goto err;
640
641 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
642 &ent->miss);
643 if (!ent->fmiss)
644 goto err;
645 }
646
647 return 0;
648err:
649 mlx5_mr_cache_debugfs_cleanup(dev);
650
651 return -ENOMEM;
652}
653
654static void delay_time_func(unsigned long ctx)
655{
656 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
657
658 dev->fill_delay = 0;
659}
660
661int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
662{
663 struct mlx5_mr_cache *cache = &dev->cache;
664 struct mlx5_cache_ent *ent;
665 int err;
666 int i;
667
668 mutex_init(&dev->slow_path_mutex);
669 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
670 if (!cache->wq) {
671 mlx5_ib_warn(dev, "failed to create work queue\n");
672 return -ENOMEM;
673 }
674
675 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
676 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
677 ent = &cache->ent[i];
678 INIT_LIST_HEAD(&ent->head);
679 spin_lock_init(&ent->lock);
680 ent->order = i + 2;
681 ent->dev = dev;
682 ent->limit = 0;
683
684 init_completion(&ent->compl);
685 INIT_WORK(&ent->work, cache_work_func);
686 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
687
688 if (i > MR_CACHE_LAST_STD_ENTRY) {
689 mlx5_odp_init_mr_cache_entry(ent);
690 continue;
691 }
692
693 if (ent->order > mr_cache_max_order(dev))
694 continue;
695
696 ent->page = PAGE_SHIFT;
697 ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
698 MLX5_IB_UMR_OCTOWORD;
699 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
700 if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
701 !dev->rep &&
702 mlx5_core_is_pf(dev->mdev))
703 ent->limit = dev->mdev->profile->mr_cache[i].limit;
704 else
705 ent->limit = 0;
706 queue_work(cache->wq, &ent->work);
707 }
708
709 err = mlx5_mr_cache_debugfs_init(dev);
710 if (err)
711 mlx5_ib_warn(dev, "cache debugfs failure\n");
712
713
714
715
716
717
718 return 0;
719}
720
721static void wait_for_async_commands(struct mlx5_ib_dev *dev)
722{
723 struct mlx5_mr_cache *cache = &dev->cache;
724 struct mlx5_cache_ent *ent;
725 int total = 0;
726 int i;
727 int j;
728
729 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
730 ent = &cache->ent[i];
731 for (j = 0 ; j < 1000; j++) {
732 if (!ent->pending)
733 break;
734 msleep(50);
735 }
736 }
737 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
738 ent = &cache->ent[i];
739 total += ent->pending;
740 }
741
742 if (total)
743 mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
744 else
745 mlx5_ib_warn(dev, "done with all pending requests\n");
746}
747
748int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
749{
750 int i;
751
752 if (!dev->cache.wq)
753 return 0;
754
755 dev->cache.stopped = 1;
756 flush_workqueue(dev->cache.wq);
757
758 mlx5_mr_cache_debugfs_cleanup(dev);
759
760 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
761 clean_keys(dev, i);
762
763 destroy_workqueue(dev->cache.wq);
764 wait_for_async_commands(dev);
765 del_timer_sync(&dev->delay_timer);
766
767 return 0;
768}
769
770struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
771{
772 struct mlx5_ib_dev *dev = to_mdev(pd->device);
773 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
774 struct mlx5_core_dev *mdev = dev->mdev;
775 struct mlx5_ib_mr *mr;
776 void *mkc;
777 u32 *in;
778 int err;
779
780 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
781 if (!mr)
782 return ERR_PTR(-ENOMEM);
783
784 in = kzalloc(inlen, GFP_KERNEL);
785 if (!in) {
786 err = -ENOMEM;
787 goto err_free;
788 }
789
790 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
791
792 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
793 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
794 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
795 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
796 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
797 MLX5_SET(mkc, mkc, lr, 1);
798
799 MLX5_SET(mkc, mkc, length64, 1);
800 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
801 MLX5_SET(mkc, mkc, qpn, 0xffffff);
802 MLX5_SET64(mkc, mkc, start_addr, 0);
803
804 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
805 if (err)
806 goto err_in;
807
808 kfree(in);
809 mr->mmkey.type = MLX5_MKEY_MR;
810 mr->ibmr.lkey = mr->mmkey.key;
811 mr->ibmr.rkey = mr->mmkey.key;
812 mr->umem = NULL;
813
814 return &mr->ibmr;
815
816err_in:
817 kfree(in);
818
819err_free:
820 kfree(mr);
821
822 return ERR_PTR(err);
823}
824
825static int get_octo_len(u64 addr, u64 len, int page_shift)
826{
827 u64 page_size = 1ULL << page_shift;
828 u64 offset;
829 int npages;
830
831 offset = addr & (page_size - 1);
832 npages = ALIGN(len + offset, page_size) >> page_shift;
833 return (npages + 1) / 2;
834}
835
836static int mr_cache_max_order(struct mlx5_ib_dev *dev)
837{
838 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
839 return MR_CACHE_LAST_STD_ENTRY + 2;
840 return MLX5_MAX_UMR_SHIFT;
841}
842
843static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
844 int access_flags, struct ib_umem **umem,
845 int *npages, int *page_shift, int *ncont,
846 int *order)
847{
848 struct mlx5_ib_dev *dev = to_mdev(pd->device);
849 struct ib_umem *u;
850 int err;
851
852 *umem = NULL;
853
854 u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0);
855 err = PTR_ERR_OR_ZERO(u);
856 if (err) {
857 mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
858 return err;
859 }
860
861 mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
862 page_shift, ncont, order);
863 if (!*npages) {
864 mlx5_ib_warn(dev, "avoid zero region\n");
865 ib_umem_release(u);
866 return -EINVAL;
867 }
868
869 *umem = u;
870
871 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
872 *npages, *ncont, *order, *page_shift);
873
874 return 0;
875}
876
877static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
878{
879 struct mlx5_ib_umr_context *context =
880 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
881
882 context->status = wc->status;
883 complete(&context->done);
884}
885
886static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
887{
888 context->cqe.done = mlx5_ib_umr_done;
889 context->status = -1;
890 init_completion(&context->done);
891}
892
893static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
894 struct mlx5_umr_wr *umrwr)
895{
896 struct umr_common *umrc = &dev->umrc;
897 const struct ib_send_wr *bad;
898 int err;
899 struct mlx5_ib_umr_context umr_context;
900
901 mlx5_ib_init_umr_context(&umr_context);
902 umrwr->wr.wr_cqe = &umr_context.cqe;
903
904 down(&umrc->sem);
905 err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
906 if (err) {
907 mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
908 } else {
909 wait_for_completion(&umr_context.done);
910 if (umr_context.status != IB_WC_SUCCESS) {
911 mlx5_ib_warn(dev, "reg umr failed (%u)\n",
912 umr_context.status);
913 err = -EFAULT;
914 }
915 }
916 up(&umrc->sem);
917 return err;
918}
919
920static struct mlx5_ib_mr *alloc_mr_from_cache(
921 struct ib_pd *pd, struct ib_umem *umem,
922 u64 virt_addr, u64 len, int npages,
923 int page_shift, int order, int access_flags)
924{
925 struct mlx5_ib_dev *dev = to_mdev(pd->device);
926 struct mlx5_ib_mr *mr;
927 int err = 0;
928 int i;
929
930 for (i = 0; i < 1; i++) {
931 mr = alloc_cached_mr(dev, order);
932 if (mr)
933 break;
934
935 err = add_keys(dev, order2idx(dev, order), 1);
936 if (err && err != -EAGAIN) {
937 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
938 break;
939 }
940 }
941
942 if (!mr)
943 return ERR_PTR(-EAGAIN);
944
945 mr->ibmr.pd = pd;
946 mr->umem = umem;
947 mr->access_flags = access_flags;
948 mr->desc_size = sizeof(struct mlx5_mtt);
949 mr->mmkey.iova = virt_addr;
950 mr->mmkey.size = len;
951 mr->mmkey.pd = to_mpd(pd)->pdn;
952
953 return mr;
954}
955
956static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
957 void *xlt, int page_shift, size_t size,
958 int flags)
959{
960 struct mlx5_ib_dev *dev = mr->dev;
961 struct ib_umem *umem = mr->umem;
962
963 if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
964 if (!umr_can_use_indirect_mkey(dev))
965 return -EPERM;
966 mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
967 return npages;
968 }
969
970 npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
971
972 if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
973 __mlx5_ib_populate_pas(dev, umem, page_shift,
974 idx, npages, xlt,
975 MLX5_IB_MTT_PRESENT);
976
977
978
979 memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
980 size - npages * sizeof(struct mlx5_mtt));
981 }
982
983 return npages;
984}
985
986#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
987 MLX5_UMR_MTT_ALIGNMENT)
988#define MLX5_SPARE_UMR_CHUNK 0x10000
989
990int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
991 int page_shift, int flags)
992{
993 struct mlx5_ib_dev *dev = mr->dev;
994 struct device *ddev = dev->ib_dev.dev.parent;
995 int size;
996 void *xlt;
997 dma_addr_t dma;
998 struct mlx5_umr_wr wr;
999 struct ib_sge sg;
1000 int err = 0;
1001 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
1002 ? sizeof(struct mlx5_klm)
1003 : sizeof(struct mlx5_mtt);
1004 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
1005 const int page_mask = page_align - 1;
1006 size_t pages_mapped = 0;
1007 size_t pages_to_map = 0;
1008 size_t pages_iter = 0;
1009 gfp_t gfp;
1010 bool use_emergency_page = false;
1011
1012 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
1013 !umr_can_use_indirect_mkey(dev))
1014 return -EPERM;
1015
1016
1017
1018
1019 if (idx & page_mask) {
1020 npages += idx & page_mask;
1021 idx &= ~page_mask;
1022 }
1023
1024 gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
1025 gfp |= __GFP_ZERO | __GFP_NOWARN;
1026
1027 pages_to_map = ALIGN(npages, page_align);
1028 size = desc_size * pages_to_map;
1029 size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
1030
1031 xlt = (void *)__get_free_pages(gfp, get_order(size));
1032 if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
1033 mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
1034 size, get_order(size), MLX5_SPARE_UMR_CHUNK);
1035
1036 size = MLX5_SPARE_UMR_CHUNK;
1037 xlt = (void *)__get_free_pages(gfp, get_order(size));
1038 }
1039
1040 if (!xlt) {
1041 mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
1042 xlt = (void *)mlx5_ib_get_xlt_emergency_page();
1043 size = PAGE_SIZE;
1044 memset(xlt, 0, size);
1045 use_emergency_page = true;
1046 }
1047 pages_iter = size / desc_size;
1048 dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
1049 if (dma_mapping_error(ddev, dma)) {
1050 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
1051 err = -ENOMEM;
1052 goto free_xlt;
1053 }
1054
1055 sg.addr = dma;
1056 sg.lkey = dev->umrc.pd->local_dma_lkey;
1057
1058 memset(&wr, 0, sizeof(wr));
1059 wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
1060 if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
1061 wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1062 wr.wr.sg_list = &sg;
1063 wr.wr.num_sge = 1;
1064 wr.wr.opcode = MLX5_IB_WR_UMR;
1065
1066 wr.pd = mr->ibmr.pd;
1067 wr.mkey = mr->mmkey.key;
1068 wr.length = mr->mmkey.size;
1069 wr.virt_addr = mr->mmkey.iova;
1070 wr.access_flags = mr->access_flags;
1071 wr.page_shift = page_shift;
1072
1073 for (pages_mapped = 0;
1074 pages_mapped < pages_to_map && !err;
1075 pages_mapped += pages_iter, idx += pages_iter) {
1076 npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
1077 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
1078 npages = populate_xlt(mr, idx, npages, xlt,
1079 page_shift, size, flags);
1080
1081 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
1082
1083 sg.length = ALIGN(npages * desc_size,
1084 MLX5_UMR_MTT_ALIGNMENT);
1085
1086 if (pages_mapped + pages_iter >= pages_to_map) {
1087 if (flags & MLX5_IB_UPD_XLT_ENABLE)
1088 wr.wr.send_flags |=
1089 MLX5_IB_SEND_UMR_ENABLE_MR |
1090 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
1091 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1092 if (flags & MLX5_IB_UPD_XLT_PD ||
1093 flags & MLX5_IB_UPD_XLT_ACCESS)
1094 wr.wr.send_flags |=
1095 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1096 if (flags & MLX5_IB_UPD_XLT_ADDR)
1097 wr.wr.send_flags |=
1098 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1099 }
1100
1101 wr.offset = idx * desc_size;
1102 wr.xlt_size = sg.length;
1103
1104 err = mlx5_ib_post_send_wait(dev, &wr);
1105 }
1106 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1107
1108free_xlt:
1109 if (use_emergency_page)
1110 mlx5_ib_put_xlt_emergency_page();
1111 else
1112 free_pages((unsigned long)xlt, get_order(size));
1113
1114 return err;
1115}
1116
1117
1118
1119
1120
1121static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
1122 u64 virt_addr, u64 length,
1123 struct ib_umem *umem, int npages,
1124 int page_shift, int access_flags,
1125 bool populate)
1126{
1127 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1128 struct mlx5_ib_mr *mr;
1129 __be64 *pas;
1130 void *mkc;
1131 int inlen;
1132 u32 *in;
1133 int err;
1134 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
1135
1136 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
1137 if (!mr)
1138 return ERR_PTR(-ENOMEM);
1139
1140 mr->ibmr.pd = pd;
1141 mr->access_flags = access_flags;
1142
1143 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1144 if (populate)
1145 inlen += sizeof(*pas) * roundup(npages, 2);
1146 in = kvzalloc(inlen, GFP_KERNEL);
1147 if (!in) {
1148 err = -ENOMEM;
1149 goto err_1;
1150 }
1151 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
1152 if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
1153 mlx5_ib_populate_pas(dev, umem, page_shift, pas,
1154 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
1155
1156
1157
1158 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1159
1160 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1161 MLX5_SET(mkc, mkc, free, !populate);
1162 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
1163 MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
1164 MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
1165 MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
1166 MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
1167 MLX5_SET(mkc, mkc, lr, 1);
1168 MLX5_SET(mkc, mkc, umr_en, 1);
1169
1170 MLX5_SET64(mkc, mkc, start_addr, virt_addr);
1171 MLX5_SET64(mkc, mkc, len, length);
1172 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1173 MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1174 MLX5_SET(mkc, mkc, translations_octword_size,
1175 get_octo_len(virt_addr, length, page_shift));
1176 MLX5_SET(mkc, mkc, log_page_size, page_shift);
1177 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1178 if (populate) {
1179 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
1180 get_octo_len(virt_addr, length, page_shift));
1181 }
1182
1183 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1184 if (err) {
1185 mlx5_ib_warn(dev, "create mkey failed\n");
1186 goto err_2;
1187 }
1188 mr->mmkey.type = MLX5_MKEY_MR;
1189 mr->desc_size = sizeof(struct mlx5_mtt);
1190 mr->dev = dev;
1191 kvfree(in);
1192
1193 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
1194
1195 return mr;
1196
1197err_2:
1198 kvfree(in);
1199
1200err_1:
1201 if (!ibmr)
1202 kfree(mr);
1203
1204 return ERR_PTR(err);
1205}
1206
1207static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
1208 int npages, u64 length, int access_flags)
1209{
1210 mr->npages = npages;
1211 atomic_add(npages, &dev->mdev->priv.reg_pages);
1212 mr->ibmr.lkey = mr->mmkey.key;
1213 mr->ibmr.rkey = mr->mmkey.key;
1214 mr->ibmr.length = length;
1215 mr->access_flags = access_flags;
1216}
1217
1218static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr,
1219 u64 length, int acc)
1220{
1221 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1222 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1223 struct mlx5_core_dev *mdev = dev->mdev;
1224 struct mlx5_ib_mr *mr;
1225 void *mkc;
1226 u32 *in;
1227 int err;
1228
1229 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1230 if (!mr)
1231 return ERR_PTR(-ENOMEM);
1232
1233 in = kzalloc(inlen, GFP_KERNEL);
1234 if (!in) {
1235 err = -ENOMEM;
1236 goto err_free;
1237 }
1238
1239 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1240
1241 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3);
1242 MLX5_SET(mkc, mkc, access_mode_4_2,
1243 (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7);
1244 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
1245 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
1246 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
1247 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
1248 MLX5_SET(mkc, mkc, lr, 1);
1249
1250 MLX5_SET64(mkc, mkc, len, length);
1251 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1252 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1253 MLX5_SET64(mkc, mkc, start_addr,
1254 memic_addr - pci_resource_start(dev->mdev->pdev, 0));
1255
1256 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
1257 if (err)
1258 goto err_in;
1259
1260 kfree(in);
1261
1262 mr->umem = NULL;
1263 set_mr_fields(dev, mr, 0, length, acc);
1264
1265 return &mr->ibmr;
1266
1267err_in:
1268 kfree(in);
1269
1270err_free:
1271 kfree(mr);
1272
1273 return ERR_PTR(err);
1274}
1275
1276struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
1277 struct ib_dm_mr_attr *attr,
1278 struct uverbs_attr_bundle *attrs)
1279{
1280 struct mlx5_ib_dm *mdm = to_mdm(dm);
1281 u64 memic_addr;
1282
1283 if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS)
1284 return ERR_PTR(-EINVAL);
1285
1286 memic_addr = mdm->dev_addr + attr->offset;
1287
1288 return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length,
1289 attr->access_flags);
1290}
1291
1292struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1293 u64 virt_addr, int access_flags,
1294 struct ib_udata *udata)
1295{
1296 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1297 struct mlx5_ib_mr *mr = NULL;
1298 bool use_umr;
1299 struct ib_umem *umem;
1300 int page_shift;
1301 int npages;
1302 int ncont;
1303 int order;
1304 int err;
1305
1306 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1307 return ERR_PTR(-EOPNOTSUPP);
1308
1309 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1310 start, virt_addr, length, access_flags);
1311
1312#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1313 if (!start && length == U64_MAX) {
1314 if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
1315 !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1316 return ERR_PTR(-EINVAL);
1317
1318 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
1319 if (IS_ERR(mr))
1320 return ERR_CAST(mr);
1321 return &mr->ibmr;
1322 }
1323#endif
1324
1325 err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
1326 &page_shift, &ncont, &order);
1327
1328 if (err < 0)
1329 return ERR_PTR(err);
1330
1331 use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) &&
1332 (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) ||
1333 !MLX5_CAP_GEN(dev->mdev, atomic));
1334
1335 if (order <= mr_cache_max_order(dev) && use_umr) {
1336 mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
1337 page_shift, order, access_flags);
1338 if (PTR_ERR(mr) == -EAGAIN) {
1339 mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
1340 mr = NULL;
1341 }
1342 } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
1343 if (access_flags & IB_ACCESS_ON_DEMAND) {
1344 err = -EINVAL;
1345 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
1346 goto error;
1347 }
1348 use_umr = false;
1349 }
1350
1351 if (!mr) {
1352 mutex_lock(&dev->slow_path_mutex);
1353 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
1354 page_shift, access_flags, !use_umr);
1355 mutex_unlock(&dev->slow_path_mutex);
1356 }
1357
1358 if (IS_ERR(mr)) {
1359 err = PTR_ERR(mr);
1360 goto error;
1361 }
1362
1363 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1364
1365 mr->umem = umem;
1366 set_mr_fields(dev, mr, npages, length, access_flags);
1367
1368#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1369 update_odp_mr(mr);
1370#endif
1371
1372 if (use_umr) {
1373 int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
1374
1375 if (access_flags & IB_ACCESS_ON_DEMAND)
1376 update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
1377
1378 err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
1379 update_xlt_flags);
1380
1381 if (err) {
1382 dereg_mr(dev, mr);
1383 return ERR_PTR(err);
1384 }
1385 }
1386
1387#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1388 mr->live = 1;
1389#endif
1390 return &mr->ibmr;
1391error:
1392 ib_umem_release(umem);
1393 return ERR_PTR(err);
1394}
1395
1396static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1397{
1398 struct mlx5_core_dev *mdev = dev->mdev;
1399 struct mlx5_umr_wr umrwr = {};
1400
1401 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1402 return 0;
1403
1404 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR;
1405 umrwr.wr.opcode = MLX5_IB_WR_UMR;
1406 umrwr.mkey = mr->mmkey.key;
1407 umrwr.ignore_free_state = 1;
1408
1409 return mlx5_ib_post_send_wait(dev, &umrwr);
1410}
1411
1412static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1413 int access_flags, int flags)
1414{
1415 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1416 struct mlx5_umr_wr umrwr = {};
1417 int err;
1418
1419 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1420
1421 umrwr.wr.opcode = MLX5_IB_WR_UMR;
1422 umrwr.mkey = mr->mmkey.key;
1423
1424 if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
1425 umrwr.pd = pd;
1426 umrwr.access_flags = access_flags;
1427 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1428 }
1429
1430 err = mlx5_ib_post_send_wait(dev, &umrwr);
1431
1432 return err;
1433}
1434
1435int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1436 u64 length, u64 virt_addr, int new_access_flags,
1437 struct ib_pd *new_pd, struct ib_udata *udata)
1438{
1439 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1440 struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1441 struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1442 int access_flags = flags & IB_MR_REREG_ACCESS ?
1443 new_access_flags :
1444 mr->access_flags;
1445 int page_shift = 0;
1446 int upd_flags = 0;
1447 int npages = 0;
1448 int ncont = 0;
1449 int order = 0;
1450 u64 addr, len;
1451 int err;
1452
1453 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1454 start, virt_addr, length, access_flags);
1455
1456 atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
1457
1458 if (!mr->umem)
1459 return -EINVAL;
1460
1461 if (flags & IB_MR_REREG_TRANS) {
1462 addr = virt_addr;
1463 len = length;
1464 } else {
1465 addr = mr->umem->address;
1466 len = mr->umem->length;
1467 }
1468
1469 if (flags != IB_MR_REREG_PD) {
1470
1471
1472
1473
1474 flags |= IB_MR_REREG_TRANS;
1475 ib_umem_release(mr->umem);
1476 mr->umem = NULL;
1477 err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
1478 &npages, &page_shift, &ncont, &order);
1479 if (err)
1480 goto err;
1481 }
1482
1483 if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
1484
1485
1486
1487 if (mr->allocated_from_cache)
1488 err = unreg_umr(dev, mr);
1489 else
1490 err = destroy_mkey(dev, mr);
1491 if (err)
1492 goto err;
1493
1494 mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
1495 page_shift, access_flags, true);
1496
1497 if (IS_ERR(mr)) {
1498 err = PTR_ERR(mr);
1499 mr = to_mmr(ib_mr);
1500 goto err;
1501 }
1502
1503 mr->allocated_from_cache = 0;
1504#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1505 mr->live = 1;
1506#endif
1507 } else {
1508
1509
1510
1511 mr->ibmr.pd = pd;
1512 mr->access_flags = access_flags;
1513 mr->mmkey.iova = addr;
1514 mr->mmkey.size = len;
1515 mr->mmkey.pd = to_mpd(pd)->pdn;
1516
1517 if (flags & IB_MR_REREG_TRANS) {
1518 upd_flags = MLX5_IB_UPD_XLT_ADDR;
1519 if (flags & IB_MR_REREG_PD)
1520 upd_flags |= MLX5_IB_UPD_XLT_PD;
1521 if (flags & IB_MR_REREG_ACCESS)
1522 upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
1523 err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
1524 upd_flags);
1525 } else {
1526 err = rereg_umr(pd, mr, access_flags, flags);
1527 }
1528
1529 if (err)
1530 goto err;
1531 }
1532
1533 set_mr_fields(dev, mr, npages, len, access_flags);
1534
1535#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1536 update_odp_mr(mr);
1537#endif
1538 return 0;
1539
1540err:
1541 if (mr->umem) {
1542 ib_umem_release(mr->umem);
1543 mr->umem = NULL;
1544 }
1545 clean_mr(dev, mr);
1546 return err;
1547}
1548
1549static int
1550mlx5_alloc_priv_descs(struct ib_device *device,
1551 struct mlx5_ib_mr *mr,
1552 int ndescs,
1553 int desc_size)
1554{
1555 int size = ndescs * desc_size;
1556 int add_size;
1557 int ret;
1558
1559 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1560
1561 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1562 if (!mr->descs_alloc)
1563 return -ENOMEM;
1564
1565 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1566
1567 mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
1568 size, DMA_TO_DEVICE);
1569 if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
1570 ret = -ENOMEM;
1571 goto err;
1572 }
1573
1574 return 0;
1575err:
1576 kfree(mr->descs_alloc);
1577
1578 return ret;
1579}
1580
1581static void
1582mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1583{
1584 if (mr->descs) {
1585 struct ib_device *device = mr->ibmr.device;
1586 int size = mr->max_descs * mr->desc_size;
1587
1588 dma_unmap_single(device->dev.parent, mr->desc_map,
1589 size, DMA_TO_DEVICE);
1590 kfree(mr->descs_alloc);
1591 mr->descs = NULL;
1592 }
1593}
1594
1595static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1596{
1597 int allocated_from_cache = mr->allocated_from_cache;
1598
1599 if (mr->sig) {
1600 if (mlx5_core_destroy_psv(dev->mdev,
1601 mr->sig->psv_memory.psv_idx))
1602 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1603 mr->sig->psv_memory.psv_idx);
1604 if (mlx5_core_destroy_psv(dev->mdev,
1605 mr->sig->psv_wire.psv_idx))
1606 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1607 mr->sig->psv_wire.psv_idx);
1608 kfree(mr->sig);
1609 mr->sig = NULL;
1610 }
1611
1612 if (!allocated_from_cache) {
1613 destroy_mkey(dev, mr);
1614 mlx5_free_priv_descs(mr);
1615 }
1616}
1617
1618static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1619{
1620 int npages = mr->npages;
1621 struct ib_umem *umem = mr->umem;
1622
1623#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1624 if (umem && umem->is_odp) {
1625 struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
1626
1627
1628 mr->live = 0;
1629
1630 synchronize_srcu(&dev->mr_srcu);
1631
1632 if (umem_odp->page_list)
1633 mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
1634 ib_umem_end(umem));
1635 else
1636 mlx5_ib_free_implicit_mr(mr);
1637
1638
1639
1640
1641
1642 ib_umem_release(umem);
1643 atomic_sub(npages, &dev->mdev->priv.reg_pages);
1644
1645
1646 umem = NULL;
1647 }
1648#endif
1649 clean_mr(dev, mr);
1650
1651
1652
1653
1654
1655 mlx5_mr_cache_free(dev, mr);
1656 if (umem) {
1657 ib_umem_release(umem);
1658 atomic_sub(npages, &dev->mdev->priv.reg_pages);
1659 }
1660 if (!mr->allocated_from_cache)
1661 kfree(mr);
1662}
1663
1664int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1665{
1666 dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
1667 return 0;
1668}
1669
1670struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1671 enum ib_mr_type mr_type,
1672 u32 max_num_sg)
1673{
1674 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1675 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1676 int ndescs = ALIGN(max_num_sg, 4);
1677 struct mlx5_ib_mr *mr;
1678 void *mkc;
1679 u32 *in;
1680 int err;
1681
1682 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1683 if (!mr)
1684 return ERR_PTR(-ENOMEM);
1685
1686 in = kzalloc(inlen, GFP_KERNEL);
1687 if (!in) {
1688 err = -ENOMEM;
1689 goto err_free;
1690 }
1691
1692 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1693 MLX5_SET(mkc, mkc, free, 1);
1694 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1695 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1696 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1697
1698 if (mr_type == IB_MR_TYPE_MEM_REG) {
1699 mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
1700 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
1701 err = mlx5_alloc_priv_descs(pd->device, mr,
1702 ndescs, sizeof(struct mlx5_mtt));
1703 if (err)
1704 goto err_free_in;
1705
1706 mr->desc_size = sizeof(struct mlx5_mtt);
1707 mr->max_descs = ndescs;
1708 } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
1709 mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
1710
1711 err = mlx5_alloc_priv_descs(pd->device, mr,
1712 ndescs, sizeof(struct mlx5_klm));
1713 if (err)
1714 goto err_free_in;
1715 mr->desc_size = sizeof(struct mlx5_klm);
1716 mr->max_descs = ndescs;
1717 } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
1718 u32 psv_index[2];
1719
1720 MLX5_SET(mkc, mkc, bsf_en, 1);
1721 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1722 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1723 if (!mr->sig) {
1724 err = -ENOMEM;
1725 goto err_free_in;
1726 }
1727
1728
1729 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1730 2, psv_index);
1731 if (err)
1732 goto err_free_sig;
1733
1734 mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
1735 mr->sig->psv_memory.psv_idx = psv_index[0];
1736 mr->sig->psv_wire.psv_idx = psv_index[1];
1737
1738 mr->sig->sig_status_checked = true;
1739 mr->sig->sig_err_exists = false;
1740
1741 ++mr->sig->sigerr_count;
1742 } else {
1743 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1744 err = -EINVAL;
1745 goto err_free_in;
1746 }
1747
1748 MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
1749 MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
1750 MLX5_SET(mkc, mkc, umr_en, 1);
1751
1752 mr->ibmr.device = pd->device;
1753 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1754 if (err)
1755 goto err_destroy_psv;
1756
1757 mr->mmkey.type = MLX5_MKEY_MR;
1758 mr->ibmr.lkey = mr->mmkey.key;
1759 mr->ibmr.rkey = mr->mmkey.key;
1760 mr->umem = NULL;
1761 kfree(in);
1762
1763 return &mr->ibmr;
1764
1765err_destroy_psv:
1766 if (mr->sig) {
1767 if (mlx5_core_destroy_psv(dev->mdev,
1768 mr->sig->psv_memory.psv_idx))
1769 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1770 mr->sig->psv_memory.psv_idx);
1771 if (mlx5_core_destroy_psv(dev->mdev,
1772 mr->sig->psv_wire.psv_idx))
1773 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1774 mr->sig->psv_wire.psv_idx);
1775 }
1776 mlx5_free_priv_descs(mr);
1777err_free_sig:
1778 kfree(mr->sig);
1779err_free_in:
1780 kfree(in);
1781err_free:
1782 kfree(mr);
1783 return ERR_PTR(err);
1784}
1785
1786struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
1787 struct ib_udata *udata)
1788{
1789 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1790 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1791 struct mlx5_ib_mw *mw = NULL;
1792 u32 *in = NULL;
1793 void *mkc;
1794 int ndescs;
1795 int err;
1796 struct mlx5_ib_alloc_mw req = {};
1797 struct {
1798 __u32 comp_mask;
1799 __u32 response_length;
1800 } resp = {};
1801
1802 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1803 if (err)
1804 return ERR_PTR(err);
1805
1806 if (req.comp_mask || req.reserved1 || req.reserved2)
1807 return ERR_PTR(-EOPNOTSUPP);
1808
1809 if (udata->inlen > sizeof(req) &&
1810 !ib_is_udata_cleared(udata, sizeof(req),
1811 udata->inlen - sizeof(req)))
1812 return ERR_PTR(-EOPNOTSUPP);
1813
1814 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
1815
1816 mw = kzalloc(sizeof(*mw), GFP_KERNEL);
1817 in = kzalloc(inlen, GFP_KERNEL);
1818 if (!mw || !in) {
1819 err = -ENOMEM;
1820 goto free;
1821 }
1822
1823 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1824
1825 MLX5_SET(mkc, mkc, free, 1);
1826 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1827 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1828 MLX5_SET(mkc, mkc, umr_en, 1);
1829 MLX5_SET(mkc, mkc, lr, 1);
1830 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
1831 MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
1832 MLX5_SET(mkc, mkc, qpn, 0xffffff);
1833
1834 err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
1835 if (err)
1836 goto free;
1837
1838 mw->mmkey.type = MLX5_MKEY_MW;
1839 mw->ibmw.rkey = mw->mmkey.key;
1840 mw->ndescs = ndescs;
1841
1842 resp.response_length = min(offsetof(typeof(resp), response_length) +
1843 sizeof(resp.response_length), udata->outlen);
1844 if (resp.response_length) {
1845 err = ib_copy_to_udata(udata, &resp, resp.response_length);
1846 if (err) {
1847 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
1848 goto free;
1849 }
1850 }
1851
1852 kfree(in);
1853 return &mw->ibmw;
1854
1855free:
1856 kfree(mw);
1857 kfree(in);
1858 return ERR_PTR(err);
1859}
1860
1861int mlx5_ib_dealloc_mw(struct ib_mw *mw)
1862{
1863 struct mlx5_ib_mw *mmw = to_mmw(mw);
1864 int err;
1865
1866 err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
1867 &mmw->mmkey);
1868 if (!err)
1869 kfree(mmw);
1870 return err;
1871}
1872
1873int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1874 struct ib_mr_status *mr_status)
1875{
1876 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1877 int ret = 0;
1878
1879 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1880 pr_err("Invalid status check mask\n");
1881 ret = -EINVAL;
1882 goto done;
1883 }
1884
1885 mr_status->fail_status = 0;
1886 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1887 if (!mmr->sig) {
1888 ret = -EINVAL;
1889 pr_err("signature status check requested on a non-signature enabled MR\n");
1890 goto done;
1891 }
1892
1893 mmr->sig->sig_status_checked = true;
1894 if (!mmr->sig->sig_err_exists)
1895 goto done;
1896
1897 if (ibmr->lkey == mmr->sig->err_item.key)
1898 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1899 sizeof(mr_status->sig_err));
1900 else {
1901 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1902 mr_status->sig_err.sig_err_offset = 0;
1903 mr_status->sig_err.key = mmr->sig->err_item.key;
1904 }
1905
1906 mmr->sig->sig_err_exists = false;
1907 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1908 }
1909
1910done:
1911 return ret;
1912}
1913
1914static int
1915mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
1916 struct scatterlist *sgl,
1917 unsigned short sg_nents,
1918 unsigned int *sg_offset_p)
1919{
1920 struct scatterlist *sg = sgl;
1921 struct mlx5_klm *klms = mr->descs;
1922 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1923 u32 lkey = mr->ibmr.pd->local_dma_lkey;
1924 int i;
1925
1926 mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
1927 mr->ibmr.length = 0;
1928
1929 for_each_sg(sgl, sg, sg_nents, i) {
1930 if (unlikely(i >= mr->max_descs))
1931 break;
1932 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
1933 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
1934 klms[i].key = cpu_to_be32(lkey);
1935 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
1936
1937 sg_offset = 0;
1938 }
1939 mr->ndescs = i;
1940
1941 if (sg_offset_p)
1942 *sg_offset_p = sg_offset;
1943
1944 return i;
1945}
1946
1947static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1948{
1949 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1950 __be64 *descs;
1951
1952 if (unlikely(mr->ndescs == mr->max_descs))
1953 return -ENOMEM;
1954
1955 descs = mr->descs;
1956 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
1957
1958 return 0;
1959}
1960
1961int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1962 unsigned int *sg_offset)
1963{
1964 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1965 int n;
1966
1967 mr->ndescs = 0;
1968
1969 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
1970 mr->desc_size * mr->max_descs,
1971 DMA_TO_DEVICE);
1972
1973 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
1974 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
1975 else
1976 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
1977 mlx5_set_page);
1978
1979 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1980 mr->desc_size * mr->max_descs,
1981 DMA_TO_DEVICE);
1982
1983 return n;
1984}
1985