1
2
3
4
5
6
7
8
9
10
11#define pr_fmt(fmt) "pci-p2pdma: " fmt
12#include <linux/ctype.h>
13#include <linux/pci-p2pdma.h>
14#include <linux/module.h>
15#include <linux/slab.h>
16#include <linux/genalloc.h>
17#include <linux/memremap.h>
18#include <linux/percpu-refcount.h>
19#include <linux/random.h>
20#include <linux/seq_buf.h>
21#include <linux/iommu.h>
22
23struct pci_p2pdma {
24 struct gen_pool *pool;
25 bool p2pmem_published;
26};
27
28struct pci_p2pdma_pagemap {
29 struct dev_pagemap pgmap;
30 u64 bus_offset;
31};
32
33static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap)
34{
35 return container_of(pgmap, struct pci_p2pdma_pagemap, pgmap);
36}
37
38static ssize_t size_show(struct device *dev, struct device_attribute *attr,
39 char *buf)
40{
41 struct pci_dev *pdev = to_pci_dev(dev);
42 size_t size = 0;
43
44 if (pdev->p2pdma->pool)
45 size = gen_pool_size(pdev->p2pdma->pool);
46
47 return scnprintf(buf, PAGE_SIZE, "%zd\n", size);
48}
49static DEVICE_ATTR_RO(size);
50
51static ssize_t available_show(struct device *dev, struct device_attribute *attr,
52 char *buf)
53{
54 struct pci_dev *pdev = to_pci_dev(dev);
55 size_t avail = 0;
56
57 if (pdev->p2pdma->pool)
58 avail = gen_pool_avail(pdev->p2pdma->pool);
59
60 return scnprintf(buf, PAGE_SIZE, "%zd\n", avail);
61}
62static DEVICE_ATTR_RO(available);
63
64static ssize_t published_show(struct device *dev, struct device_attribute *attr,
65 char *buf)
66{
67 struct pci_dev *pdev = to_pci_dev(dev);
68
69 return scnprintf(buf, PAGE_SIZE, "%d\n",
70 pdev->p2pdma->p2pmem_published);
71}
72static DEVICE_ATTR_RO(published);
73
74static struct attribute *p2pmem_attrs[] = {
75 &dev_attr_size.attr,
76 &dev_attr_available.attr,
77 &dev_attr_published.attr,
78 NULL,
79};
80
81static const struct attribute_group p2pmem_group = {
82 .attrs = p2pmem_attrs,
83 .name = "p2pmem",
84};
85
86static void pci_p2pdma_release(void *data)
87{
88 struct pci_dev *pdev = data;
89 struct pci_p2pdma *p2pdma = pdev->p2pdma;
90
91 if (!p2pdma)
92 return;
93
94
95 pdev->p2pdma = NULL;
96 synchronize_rcu();
97
98 gen_pool_destroy(p2pdma->pool);
99 sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
100}
101
102static int pci_p2pdma_setup(struct pci_dev *pdev)
103{
104 int error = -ENOMEM;
105 struct pci_p2pdma *p2p;
106
107 p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL);
108 if (!p2p)
109 return -ENOMEM;
110
111 p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
112 if (!p2p->pool)
113 goto out;
114
115 error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
116 if (error)
117 goto out_pool_destroy;
118
119 pdev->p2pdma = p2p;
120
121 error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group);
122 if (error)
123 goto out_pool_destroy;
124
125 return 0;
126
127out_pool_destroy:
128 pdev->p2pdma = NULL;
129 gen_pool_destroy(p2p->pool);
130out:
131 devm_kfree(&pdev->dev, p2p);
132 return error;
133}
134
135
136
137
138
139
140
141
142
143
144
145int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
146 u64 offset)
147{
148 struct pci_p2pdma_pagemap *p2p_pgmap;
149 struct dev_pagemap *pgmap;
150 void *addr;
151 int error;
152
153 if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM))
154 return -EINVAL;
155
156 if (offset >= pci_resource_len(pdev, bar))
157 return -EINVAL;
158
159 if (!size)
160 size = pci_resource_len(pdev, bar) - offset;
161
162 if (size + offset > pci_resource_len(pdev, bar))
163 return -EINVAL;
164
165 if (!pdev->p2pdma) {
166 error = pci_p2pdma_setup(pdev);
167 if (error)
168 return error;
169 }
170
171 p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
172 if (!p2p_pgmap)
173 return -ENOMEM;
174
175 pgmap = &p2p_pgmap->pgmap;
176 pgmap->range.start = pci_resource_start(pdev, bar) + offset;
177 pgmap->range.end = pgmap->range.start + size - 1;
178 pgmap->nr_range = 1;
179 pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
180
181 p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) -
182 pci_resource_start(pdev, bar);
183
184 addr = devm_memremap_pages(&pdev->dev, pgmap);
185 if (IS_ERR(addr)) {
186 error = PTR_ERR(addr);
187 goto pgmap_free;
188 }
189
190 error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr,
191 pci_bus_address(pdev, bar) + offset,
192 range_len(&pgmap->range), dev_to_node(&pdev->dev),
193 pgmap->ref);
194 if (error)
195 goto pages_free;
196
197 pci_info(pdev, "added peer-to-peer DMA memory %#llx-%#llx\n",
198 pgmap->range.start, pgmap->range.end);
199
200 return 0;
201
202pages_free:
203 devm_memunmap_pages(&pdev->dev, pgmap);
204pgmap_free:
205 devm_kfree(&pdev->dev, pgmap);
206 return error;
207}
208EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
209
210
211
212
213
214
215static struct pci_dev *find_parent_pci_dev(struct device *dev)
216{
217 struct device *parent;
218
219 dev = get_device(dev);
220
221 while (dev) {
222 if (dev_is_pci(dev))
223 return to_pci_dev(dev);
224
225 parent = get_device(dev->parent);
226 put_device(dev);
227 dev = parent;
228 }
229
230 return NULL;
231}
232
233
234
235
236
237
238static int pci_bridge_has_acs_redir(struct pci_dev *pdev)
239{
240 int pos;
241 u16 ctrl;
242
243 pos = pdev->acs_cap;
244 if (!pos)
245 return 0;
246
247 pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
248
249 if (ctrl & (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC))
250 return 1;
251
252 return 0;
253}
254
255static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev)
256{
257 if (!buf)
258 return;
259
260 seq_buf_printf(buf, "%s;", pci_name(pdev));
261}
262
263
264
265
266
267static bool root_complex_whitelist(struct pci_dev *dev)
268{
269 struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
270 struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0));
271 unsigned short vendor, device;
272
273 if (iommu_present(dev->dev.bus))
274 return false;
275
276 if (!root)
277 return false;
278
279 vendor = root->vendor;
280 device = root->device;
281 pci_dev_put(root);
282
283
284 if (vendor == PCI_VENDOR_ID_AMD && device == 0x1450)
285 return true;
286
287 return false;
288}
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333static int upstream_bridge_distance(struct pci_dev *provider,
334 struct pci_dev *client,
335 struct seq_buf *acs_list)
336{
337 struct pci_dev *a = provider, *b = client, *bb;
338 int dist_a = 0;
339 int dist_b = 0;
340 int acs_cnt = 0;
341
342
343
344
345
346
347
348 while (a) {
349 dist_b = 0;
350
351 if (pci_bridge_has_acs_redir(a)) {
352 seq_buf_print_bus_devfn(acs_list, a);
353 acs_cnt++;
354 }
355
356 bb = b;
357
358 while (bb) {
359 if (a == bb)
360 goto check_b_path_acs;
361
362 bb = pci_upstream_bridge(bb);
363 dist_b++;
364 }
365
366 a = pci_upstream_bridge(a);
367 dist_a++;
368 }
369
370
371
372
373
374 if (root_complex_whitelist(provider) &&
375 root_complex_whitelist(client))
376 return 0x1000 + dist_a + dist_b;
377
378 return -1;
379
380check_b_path_acs:
381 bb = b;
382
383 while (bb) {
384 if (a == bb)
385 break;
386
387 if (pci_bridge_has_acs_redir(bb)) {
388 seq_buf_print_bus_devfn(acs_list, bb);
389 acs_cnt++;
390 }
391
392 bb = pci_upstream_bridge(bb);
393 }
394
395 if (acs_cnt)
396 return -2;
397
398 return dist_a + dist_b;
399}
400
401static int upstream_bridge_distance_warn(struct pci_dev *provider,
402 struct pci_dev *client)
403{
404 struct seq_buf acs_list;
405 int ret;
406
407 seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
408 if (!acs_list.buffer)
409 return -ENOMEM;
410
411 ret = upstream_bridge_distance(provider, client, &acs_list);
412 if (ret == -2) {
413 pci_warn(client, "cannot be used for peer-to-peer DMA as ACS redirect is set between the client and provider (%s)\n",
414 pci_name(provider));
415
416 acs_list.buffer[acs_list.len-1] = 0;
417 pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n",
418 acs_list.buffer);
419
420 } else if (ret < 0) {
421 pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge\n",
422 pci_name(provider));
423 }
424
425 kfree(acs_list.buffer);
426
427 return ret;
428}
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
449 int num_clients, bool verbose)
450{
451 bool not_supported = false;
452 struct pci_dev *pci_client;
453 int distance = 0;
454 int i, ret;
455
456 if (num_clients == 0)
457 return -1;
458
459 for (i = 0; i < num_clients; i++) {
460 if (IS_ENABLED(CONFIG_DMA_VIRT_OPS) &&
461 clients[i]->dma_ops == &dma_virt_ops) {
462 if (verbose)
463 dev_warn(clients[i],
464 "cannot be used for peer-to-peer DMA because the driver makes use of dma_virt_ops\n");
465 return -1;
466 }
467
468 pci_client = find_parent_pci_dev(clients[i]);
469 if (!pci_client) {
470 if (verbose)
471 dev_warn(clients[i],
472 "cannot be used for peer-to-peer DMA as it is not a PCI device\n");
473 return -1;
474 }
475
476 if (verbose)
477 ret = upstream_bridge_distance_warn(provider,
478 pci_client);
479 else
480 ret = upstream_bridge_distance(provider, pci_client,
481 NULL);
482
483 pci_dev_put(pci_client);
484
485 if (ret < 0)
486 not_supported = true;
487
488 if (not_supported && !verbose)
489 break;
490
491 distance += ret;
492 }
493
494 if (not_supported)
495 return -1;
496
497 return distance;
498}
499EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many);
500
501
502
503
504
505bool pci_has_p2pmem(struct pci_dev *pdev)
506{
507 return pdev->p2pdma && pdev->p2pdma->p2pmem_published;
508}
509EXPORT_SYMBOL_GPL(pci_has_p2pmem);
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients)
529{
530 struct pci_dev *pdev = NULL;
531 int distance;
532 int closest_distance = INT_MAX;
533 struct pci_dev **closest_pdevs;
534 int dev_cnt = 0;
535 const int max_devs = PAGE_SIZE / sizeof(*closest_pdevs);
536 int i;
537
538 closest_pdevs = kmalloc(PAGE_SIZE, GFP_KERNEL);
539 if (!closest_pdevs)
540 return NULL;
541
542 while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
543 if (!pci_has_p2pmem(pdev))
544 continue;
545
546 distance = pci_p2pdma_distance_many(pdev, clients,
547 num_clients, false);
548 if (distance < 0 || distance > closest_distance)
549 continue;
550
551 if (distance == closest_distance && dev_cnt >= max_devs)
552 continue;
553
554 if (distance < closest_distance) {
555 for (i = 0; i < dev_cnt; i++)
556 pci_dev_put(closest_pdevs[i]);
557
558 dev_cnt = 0;
559 closest_distance = distance;
560 }
561
562 closest_pdevs[dev_cnt++] = pci_dev_get(pdev);
563 }
564
565 if (dev_cnt)
566 pdev = pci_dev_get(closest_pdevs[prandom_u32_max(dev_cnt)]);
567
568 for (i = 0; i < dev_cnt; i++)
569 pci_dev_put(closest_pdevs[i]);
570
571 kfree(closest_pdevs);
572 return pdev;
573}
574EXPORT_SYMBOL_GPL(pci_p2pmem_find_many);
575
576
577
578
579
580
581
582
583void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
584{
585 void *ret = NULL;
586 struct percpu_ref *ref;
587
588
589
590
591
592
593 rcu_read_lock();
594 if (unlikely(!pdev->p2pdma))
595 goto out;
596
597 ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size,
598 (void **) &ref);
599 if (!ret)
600 goto out;
601
602 if (unlikely(!percpu_ref_tryget_live(ref))) {
603 gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size);
604 ret = NULL;
605 goto out;
606 }
607out:
608 rcu_read_unlock();
609 return ret;
610}
611EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
612
613
614
615
616
617
618
619void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
620{
621 struct percpu_ref *ref;
622
623 gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size,
624 (void **) &ref);
625 percpu_ref_put(ref);
626}
627EXPORT_SYMBOL_GPL(pci_free_p2pmem);
628
629
630
631
632
633
634
635pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr)
636{
637 if (!addr)
638 return 0;
639 if (!pdev->p2pdma)
640 return 0;
641
642
643
644
645
646
647 return gen_pool_virt_to_phys(pdev->p2pdma->pool, (unsigned long)addr);
648}
649EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus);
650
651
652
653
654
655
656
657
658
659struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
660 unsigned int *nents, u32 length)
661{
662 struct scatterlist *sg;
663 void *addr;
664
665 sg = kmalloc(sizeof(*sg), GFP_KERNEL);
666 if (!sg)
667 return NULL;
668
669 sg_init_table(sg, 1);
670
671 addr = pci_alloc_p2pmem(pdev, length);
672 if (!addr)
673 goto out_free_sg;
674
675 sg_set_buf(sg, addr, length);
676 *nents = 1;
677 return sg;
678
679out_free_sg:
680 kfree(sg);
681 return NULL;
682}
683EXPORT_SYMBOL_GPL(pci_p2pmem_alloc_sgl);
684
685
686
687
688
689
690void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl)
691{
692 struct scatterlist *sg;
693 int count;
694
695 for_each_sg(sgl, sg, INT_MAX, count) {
696 if (!sg)
697 break;
698
699 pci_free_p2pmem(pdev, sg_virt(sg), sg->length);
700 }
701 kfree(sgl);
702}
703EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl);
704
705
706
707
708
709
710
711
712
713
714
715
716void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
717{
718 if (pdev->p2pdma)
719 pdev->p2pdma->p2pmem_published = publish;
720}
721EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
737 int nents, enum dma_data_direction dir, unsigned long attrs)
738{
739 struct pci_p2pdma_pagemap *p2p_pgmap;
740 struct scatterlist *s;
741 phys_addr_t paddr;
742 int i;
743
744
745
746
747
748
749
750 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_DMA_VIRT_OPS) &&
751 dev->dma_ops == &dma_virt_ops))
752 return 0;
753
754 for_each_sg(sg, s, nents, i) {
755 p2p_pgmap = to_p2p_pgmap(sg_page(s)->pgmap);
756 paddr = sg_phys(s);
757
758 s->dma_address = paddr - p2p_pgmap->bus_offset;
759 sg_dma_len(s) = s->length;
760 }
761
762 return nents;
763}
764EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs);
765
766
767
768
769
770
771
772
773
774
775void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
776 int nents, enum dma_data_direction dir, unsigned long attrs)
777{
778}
779EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
802 bool *use_p2pdma)
803{
804 struct device *dev;
805
806 dev = bus_find_device_by_name(&pci_bus_type, NULL, page);
807 if (dev) {
808 *use_p2pdma = true;
809 *p2p_dev = to_pci_dev(dev);
810
811 if (!pci_has_p2pmem(*p2p_dev)) {
812 pci_err(*p2p_dev,
813 "PCI device has no peer-to-peer memory: %s\n",
814 page);
815 pci_dev_put(*p2p_dev);
816 return -ENODEV;
817 }
818
819 return 0;
820 } else if ((page[0] == '0' || page[0] == '1') && !iscntrl(page[1])) {
821
822
823
824
825
826
827 } else if (!strtobool(page, use_p2pdma)) {
828 return 0;
829 }
830
831 pr_err("No such PCI device: %.*s\n", (int)strcspn(page, "\n"), page);
832 return -ENODEV;
833}
834EXPORT_SYMBOL_GPL(pci_p2pdma_enable_store);
835
836
837
838
839
840
841
842
843
844
845
846
847
848ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
849 bool use_p2pdma)
850{
851 if (!use_p2pdma)
852 return sprintf(page, "0\n");
853
854 if (!p2p_dev)
855 return sprintf(page, "1\n");
856
857 return sprintf(page, "%s\n", pci_name(p2p_dev));
858}
859EXPORT_SYMBOL_GPL(pci_p2pdma_enable_show);
860