1
2
3
4
5#include <string.h>
6#include <dirent.h>
7
8#include <rte_log.h>
9#include <rte_bus.h>
10#include <rte_pci.h>
11#include <rte_bus_pci.h>
12#include <rte_malloc.h>
13#include <rte_devargs.h>
14#include <rte_memcpy.h>
15#include <rte_vfio.h>
16
17#include "eal_filesystem.h"
18
19#include "private.h"
20#include "pci_init.h"
21
22
23
24
25
26
27extern struct rte_pci_bus rte_pci_bus;
28
29static int
30pci_get_kernel_driver_by_path(const char *filename, char *dri_name,
31 size_t len)
32{
33 int count;
34 char path[PATH_MAX];
35 char *name;
36
37 if (!filename || !dri_name)
38 return -1;
39
40 count = readlink(filename, path, PATH_MAX);
41 if (count >= PATH_MAX)
42 return -1;
43
44
45 if (count < 0)
46 return 1;
47
48 path[count] = '\0';
49
50 name = strrchr(path, '/');
51 if (name) {
52 strlcpy(dri_name, name + 1, len);
53 return 0;
54 }
55
56 return -1;
57}
58
59
60int
61rte_pci_map_device(struct rte_pci_device *dev)
62{
63 int ret = -1;
64
65
66 switch (dev->kdrv) {
67 case RTE_PCI_KDRV_VFIO:
68#ifdef VFIO_PRESENT
69 if (pci_vfio_is_enabled())
70 ret = pci_vfio_map_resource(dev);
71#endif
72 break;
73 case RTE_PCI_KDRV_IGB_UIO:
74 case RTE_PCI_KDRV_UIO_GENERIC:
75 if (rte_eal_using_phys_addrs()) {
76
77 ret = pci_uio_map_resource(dev);
78 }
79 break;
80 default:
81 RTE_LOG(DEBUG, EAL,
82 " Not managed by a supported kernel driver, skipped\n");
83 ret = 1;
84 break;
85 }
86
87 return ret;
88}
89
90
91void
92rte_pci_unmap_device(struct rte_pci_device *dev)
93{
94
95 switch (dev->kdrv) {
96 case RTE_PCI_KDRV_VFIO:
97#ifdef VFIO_PRESENT
98 if (pci_vfio_is_enabled())
99 pci_vfio_unmap_resource(dev);
100#endif
101 break;
102 case RTE_PCI_KDRV_IGB_UIO:
103 case RTE_PCI_KDRV_UIO_GENERIC:
104
105 pci_uio_unmap_resource(dev);
106 break;
107 default:
108 RTE_LOG(DEBUG, EAL,
109 " Not managed by a supported kernel driver, skipped\n");
110 break;
111 }
112}
113
114static int
115find_max_end_va(const struct rte_memseg_list *msl, void *arg)
116{
117 size_t sz = msl->len;
118 void *end_va = RTE_PTR_ADD(msl->base_va, sz);
119 void **max_va = arg;
120
121 if (*max_va < end_va)
122 *max_va = end_va;
123 return 0;
124}
125
126void *
127pci_find_max_end_va(void)
128{
129 void *va = NULL;
130
131 rte_memseg_list_walk(find_max_end_va, &va);
132 return va;
133}
134
135
136
137
138
139int
140pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr,
141 uint64_t *end_addr, uint64_t *flags)
142{
143 union pci_resource_info {
144 struct {
145 char *phys_addr;
146 char *end_addr;
147 char *flags;
148 };
149 char *ptrs[PCI_RESOURCE_FMT_NVAL];
150 } res_info;
151
152 if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) {
153 RTE_LOG(ERR, EAL,
154 "%s(): bad resource format\n", __func__);
155 return -1;
156 }
157 errno = 0;
158 *phys_addr = strtoull(res_info.phys_addr, NULL, 16);
159 *end_addr = strtoull(res_info.end_addr, NULL, 16);
160 *flags = strtoull(res_info.flags, NULL, 16);
161 if (errno != 0) {
162 RTE_LOG(ERR, EAL,
163 "%s(): bad resource format\n", __func__);
164 return -1;
165 }
166
167 return 0;
168}
169
170
171static int
172pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
173{
174 FILE *f;
175 char buf[BUFSIZ];
176 int i;
177 uint64_t phys_addr, end_addr, flags;
178
179 f = fopen(filename, "r");
180 if (f == NULL) {
181 RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n");
182 return -1;
183 }
184
185 for (i = 0; i<PCI_MAX_RESOURCE; i++) {
186
187 if (fgets(buf, sizeof(buf), f) == NULL) {
188 RTE_LOG(ERR, EAL,
189 "%s(): cannot read resource\n", __func__);
190 goto error;
191 }
192 if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
193 &end_addr, &flags) < 0)
194 goto error;
195
196 if (flags & IORESOURCE_MEM) {
197 dev->mem_resource[i].phys_addr = phys_addr;
198 dev->mem_resource[i].len = end_addr - phys_addr + 1;
199
200 dev->mem_resource[i].addr = NULL;
201 }
202 }
203 fclose(f);
204 return 0;
205
206error:
207 fclose(f);
208 return -1;
209}
210
211
212static int
213pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
214{
215 char filename[PATH_MAX];
216 unsigned long tmp;
217 struct rte_pci_device *dev;
218 char driver[PATH_MAX];
219 int ret;
220
221 dev = malloc(sizeof(*dev));
222 if (dev == NULL)
223 return -1;
224
225 memset(dev, 0, sizeof(*dev));
226 dev->device.bus = &rte_pci_bus.bus;
227 dev->addr = *addr;
228
229
230 snprintf(filename, sizeof(filename), "%s/vendor", dirname);
231 if (eal_parse_sysfs_value(filename, &tmp) < 0) {
232 free(dev);
233 return -1;
234 }
235 dev->id.vendor_id = (uint16_t)tmp;
236
237
238 snprintf(filename, sizeof(filename), "%s/device", dirname);
239 if (eal_parse_sysfs_value(filename, &tmp) < 0) {
240 free(dev);
241 return -1;
242 }
243 dev->id.device_id = (uint16_t)tmp;
244
245
246 snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
247 dirname);
248 if (eal_parse_sysfs_value(filename, &tmp) < 0) {
249 free(dev);
250 return -1;
251 }
252 dev->id.subsystem_vendor_id = (uint16_t)tmp;
253
254
255 snprintf(filename, sizeof(filename), "%s/subsystem_device",
256 dirname);
257 if (eal_parse_sysfs_value(filename, &tmp) < 0) {
258 free(dev);
259 return -1;
260 }
261 dev->id.subsystem_device_id = (uint16_t)tmp;
262
263
264 snprintf(filename, sizeof(filename), "%s/class",
265 dirname);
266 if (eal_parse_sysfs_value(filename, &tmp) < 0) {
267 free(dev);
268 return -1;
269 }
270
271 dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID;
272
273
274 dev->max_vfs = 0;
275 snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
276 if (!access(filename, F_OK) &&
277 eal_parse_sysfs_value(filename, &tmp) == 0)
278 dev->max_vfs = (uint16_t)tmp;
279 else {
280
281 snprintf(filename, sizeof(filename),
282 "%s/sriov_numvfs", dirname);
283 if (!access(filename, F_OK) &&
284 eal_parse_sysfs_value(filename, &tmp) == 0)
285 dev->max_vfs = (uint16_t)tmp;
286 }
287
288
289 snprintf(filename, sizeof(filename), "%s/numa_node",
290 dirname);
291
292 if (access(filename, F_OK) != -1) {
293 if (eal_parse_sysfs_value(filename, &tmp) == 0)
294 dev->device.numa_node = tmp;
295 else
296 dev->device.numa_node = -1;
297 } else {
298 dev->device.numa_node = 0;
299 }
300
301 pci_name_set(dev);
302
303
304 snprintf(filename, sizeof(filename), "%s/resource", dirname);
305 if (pci_parse_sysfs_resource(filename, dev) < 0) {
306 RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__);
307 free(dev);
308 return -1;
309 }
310
311
312 snprintf(filename, sizeof(filename), "%s/driver", dirname);
313 ret = pci_get_kernel_driver_by_path(filename, driver, sizeof(driver));
314 if (ret < 0) {
315 RTE_LOG(ERR, EAL, "Fail to get kernel driver\n");
316 free(dev);
317 return -1;
318 }
319
320 if (!ret) {
321 if (!strcmp(driver, "vfio-pci"))
322 dev->kdrv = RTE_PCI_KDRV_VFIO;
323 else if (!strcmp(driver, "igb_uio"))
324 dev->kdrv = RTE_PCI_KDRV_IGB_UIO;
325 else if (!strcmp(driver, "uio_pci_generic"))
326 dev->kdrv = RTE_PCI_KDRV_UIO_GENERIC;
327 else
328 dev->kdrv = RTE_PCI_KDRV_UNKNOWN;
329 } else {
330 free(dev);
331 return 0;
332 }
333
334 if (TAILQ_EMPTY(&rte_pci_bus.device_list)) {
335 rte_pci_add_device(dev);
336 } else {
337 struct rte_pci_device *dev2;
338 int ret;
339
340 TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) {
341 ret = rte_pci_addr_cmp(&dev->addr, &dev2->addr);
342 if (ret > 0)
343 continue;
344
345 if (ret < 0) {
346 rte_pci_insert_device(dev2, dev);
347 } else {
348 if (!rte_dev_is_probed(&dev2->device)) {
349 dev2->kdrv = dev->kdrv;
350 dev2->max_vfs = dev->max_vfs;
351 dev2->id = dev->id;
352 pci_name_set(dev2);
353 memmove(dev2->mem_resource,
354 dev->mem_resource,
355 sizeof(dev->mem_resource));
356 } else {
357
358
359
360
361
362
363
364 if (dev2->kdrv != dev->kdrv ||
365 dev2->max_vfs != dev->max_vfs ||
366 memcmp(&dev2->id, &dev->id, sizeof(dev2->id)))
367
368
369
370
371
372
373
374
375
376
377 RTE_LOG(ERR, EAL, "Unexpected device scan at %s!\n",
378 filename);
379 else if (dev2->device.devargs !=
380 dev->device.devargs) {
381 rte_devargs_remove(dev2->device.devargs);
382 pci_name_set(dev2);
383 }
384 }
385 free(dev);
386 }
387 return 0;
388 }
389
390 rte_pci_add_device(dev);
391 }
392
393 return 0;
394}
395
396
397
398
399static int
400parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr)
401{
402
403 union splitaddr {
404 struct {
405 char *domain;
406 char *bus;
407 char *devid;
408 char *function;
409 };
410 char *str[PCI_FMT_NVAL];
411 } splitaddr;
412
413 char *buf_copy = strndup(buf, bufsize);
414 if (buf_copy == NULL)
415 return -1;
416
417 if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':')
418 != PCI_FMT_NVAL - 1)
419 goto error;
420
421 splitaddr.function = strchr(splitaddr.devid,'.');
422 if (splitaddr.function == NULL)
423 goto error;
424 *splitaddr.function++ = '\0';
425
426
427 errno = 0;
428 addr->domain = strtoul(splitaddr.domain, NULL, 16);
429 addr->bus = strtoul(splitaddr.bus, NULL, 16);
430 addr->devid = strtoul(splitaddr.devid, NULL, 16);
431 addr->function = strtoul(splitaddr.function, NULL, 10);
432 if (errno != 0)
433 goto error;
434
435 free(buf_copy);
436 return 0;
437error:
438 free(buf_copy);
439 return -1;
440}
441
442
443
444
445
446int
447rte_pci_scan(void)
448{
449 struct dirent *e;
450 DIR *dir;
451 char dirname[PATH_MAX];
452 struct rte_pci_addr addr;
453
454
455 if (!rte_eal_has_pci())
456 return 0;
457
458#ifdef VFIO_PRESENT
459 if (!pci_vfio_is_enabled())
460 RTE_LOG(DEBUG, EAL, "VFIO PCI modules not loaded\n");
461#endif
462
463 dir = opendir(rte_pci_get_sysfs_path());
464 if (dir == NULL) {
465 RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
466 __func__, strerror(errno));
467 return -1;
468 }
469
470 while ((e = readdir(dir)) != NULL) {
471 if (e->d_name[0] == '.')
472 continue;
473
474 if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0)
475 continue;
476
477 if (rte_pci_ignore_device(&addr))
478 continue;
479
480 snprintf(dirname, sizeof(dirname), "%s/%s",
481 rte_pci_get_sysfs_path(), e->d_name);
482
483 if (pci_scan_one(dirname, &addr) < 0)
484 goto error;
485 }
486 closedir(dir);
487 return 0;
488
489error:
490 closedir(dir);
491 return -1;
492}
493
494#if defined(RTE_ARCH_X86)
495bool
496pci_device_iommu_support_va(const struct rte_pci_device *dev)
497{
498#define VTD_CAP_MGAW_SHIFT 16
499#define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT)
500 const struct rte_pci_addr *addr = &dev->addr;
501 char filename[PATH_MAX];
502 FILE *fp;
503 uint64_t mgaw, vtd_cap_reg = 0;
504
505 snprintf(filename, sizeof(filename),
506 "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap",
507 rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
508 addr->function);
509
510 fp = fopen(filename, "r");
511 if (fp == NULL) {
512
513 if (errno == ENOENT)
514 return true;
515
516 RTE_LOG(ERR, EAL, "%s(): can't open %s: %s\n",
517 __func__, filename, strerror(errno));
518 return false;
519 }
520
521
522 if (fscanf(fp, "%" PRIx64, &vtd_cap_reg) != 1) {
523 RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename);
524 fclose(fp);
525 return false;
526 }
527
528 fclose(fp);
529
530 mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1;
531
532
533
534
535
536
537
538
539
540 rte_mem_set_dma_mask(mgaw);
541 return true;
542}
543#elif defined(RTE_ARCH_PPC_64)
544bool
545pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev)
546{
547
548
549
550
551
552
553
554
555
556
557
558
559 char *line = NULL;
560 size_t len = 0;
561 char filename[PATH_MAX] = "/proc/cpuinfo";
562 FILE *fp = fopen(filename, "r");
563 bool pseries = false, powernv = false, qemu = false;
564 bool ret = false;
565
566 if (fp == NULL) {
567 RTE_LOG(ERR, EAL, "%s(): can't open %s: %s\n",
568 __func__, filename, strerror(errno));
569 return ret;
570 }
571
572
573 while (getline(&line, &len, fp) != -1) {
574 if (strstr(line, "platform") != NULL) {
575 if (strstr(line, "PowerNV") != NULL) {
576 RTE_LOG(DEBUG, EAL, "Running on a PowerNV platform\n");
577 powernv = true;
578 } else if (strstr(line, "pSeries") != NULL) {
579 RTE_LOG(DEBUG, EAL, "Running on a pSeries platform\n");
580 pseries = true;
581 }
582 } else if (strstr(line, "model") != NULL) {
583 if (strstr(line, "qemu") != NULL) {
584 RTE_LOG(DEBUG, EAL, "Found qemu emulation\n");
585 qemu = true;
586 }
587 }
588 }
589
590 free(line);
591 fclose(fp);
592
593 if (powernv || (pseries && !qemu))
594 ret = true;
595 return ret;
596}
597#else
598bool
599pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev)
600{
601 return true;
602}
603#endif
604
605enum rte_iova_mode
606pci_device_iova_mode(const struct rte_pci_driver *pdrv,
607 const struct rte_pci_device *pdev)
608{
609 enum rte_iova_mode iova_mode = RTE_IOVA_DC;
610
611 switch (pdev->kdrv) {
612 case RTE_PCI_KDRV_VFIO: {
613#ifdef VFIO_PRESENT
614 static int is_vfio_noiommu_enabled = -1;
615
616 if (is_vfio_noiommu_enabled == -1) {
617 if (rte_vfio_noiommu_is_enabled() == 1)
618 is_vfio_noiommu_enabled = 1;
619 else
620 is_vfio_noiommu_enabled = 0;
621 }
622 if (is_vfio_noiommu_enabled != 0)
623 iova_mode = RTE_IOVA_PA;
624 else if ((pdrv->drv_flags & RTE_PCI_DRV_NEED_IOVA_AS_VA) != 0)
625 iova_mode = RTE_IOVA_VA;
626#endif
627 break;
628 }
629
630 case RTE_PCI_KDRV_IGB_UIO:
631 case RTE_PCI_KDRV_UIO_GENERIC:
632 iova_mode = RTE_IOVA_PA;
633 break;
634
635 default:
636 if ((pdrv->drv_flags & RTE_PCI_DRV_NEED_IOVA_AS_VA) != 0)
637 iova_mode = RTE_IOVA_VA;
638 break;
639 }
640 return iova_mode;
641}
642
643
644int rte_pci_read_config(const struct rte_pci_device *device,
645 void *buf, size_t len, off_t offset)
646{
647 char devname[RTE_DEV_NAME_MAX_LEN] = "";
648 const struct rte_intr_handle *intr_handle = device->intr_handle;
649
650 switch (device->kdrv) {
651 case RTE_PCI_KDRV_IGB_UIO:
652 case RTE_PCI_KDRV_UIO_GENERIC:
653 return pci_uio_read_config(intr_handle, buf, len, offset);
654#ifdef VFIO_PRESENT
655 case RTE_PCI_KDRV_VFIO:
656 return pci_vfio_read_config(intr_handle, buf, len, offset);
657#endif
658 default:
659 rte_pci_device_name(&device->addr, devname,
660 RTE_DEV_NAME_MAX_LEN);
661 RTE_LOG(ERR, EAL,
662 "Unknown driver type for %s\n", devname);
663 return -1;
664 }
665}
666
667
668int rte_pci_write_config(const struct rte_pci_device *device,
669 const void *buf, size_t len, off_t offset)
670{
671 char devname[RTE_DEV_NAME_MAX_LEN] = "";
672 const struct rte_intr_handle *intr_handle = device->intr_handle;
673
674 switch (device->kdrv) {
675 case RTE_PCI_KDRV_IGB_UIO:
676 case RTE_PCI_KDRV_UIO_GENERIC:
677 return pci_uio_write_config(intr_handle, buf, len, offset);
678#ifdef VFIO_PRESENT
679 case RTE_PCI_KDRV_VFIO:
680 return pci_vfio_write_config(intr_handle, buf, len, offset);
681#endif
682 default:
683 rte_pci_device_name(&device->addr, devname,
684 RTE_DEV_NAME_MAX_LEN);
685 RTE_LOG(ERR, EAL,
686 "Unknown driver type for %s\n", devname);
687 return -1;
688 }
689}
690
691int
692rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
693 struct rte_pci_ioport *p)
694{
695 int ret = -1;
696
697 switch (dev->kdrv) {
698#ifdef VFIO_PRESENT
699 case RTE_PCI_KDRV_VFIO:
700 if (pci_vfio_is_enabled())
701 ret = pci_vfio_ioport_map(dev, bar, p);
702 break;
703#endif
704 case RTE_PCI_KDRV_IGB_UIO:
705 case RTE_PCI_KDRV_UIO_GENERIC:
706 ret = pci_uio_ioport_map(dev, bar, p);
707 break;
708 default:
709 break;
710 }
711
712 if (!ret)
713 p->dev = dev;
714
715 return ret;
716}
717
718void
719rte_pci_ioport_read(struct rte_pci_ioport *p,
720 void *data, size_t len, off_t offset)
721{
722 switch (p->dev->kdrv) {
723#ifdef VFIO_PRESENT
724 case RTE_PCI_KDRV_VFIO:
725 pci_vfio_ioport_read(p, data, len, offset);
726 break;
727#endif
728 case RTE_PCI_KDRV_IGB_UIO:
729 case RTE_PCI_KDRV_UIO_GENERIC:
730 pci_uio_ioport_read(p, data, len, offset);
731 break;
732 default:
733 break;
734 }
735}
736
737void
738rte_pci_ioport_write(struct rte_pci_ioport *p,
739 const void *data, size_t len, off_t offset)
740{
741 switch (p->dev->kdrv) {
742#ifdef VFIO_PRESENT
743 case RTE_PCI_KDRV_VFIO:
744 pci_vfio_ioport_write(p, data, len, offset);
745 break;
746#endif
747 case RTE_PCI_KDRV_IGB_UIO:
748 case RTE_PCI_KDRV_UIO_GENERIC:
749 pci_uio_ioport_write(p, data, len, offset);
750 break;
751 default:
752 break;
753 }
754}
755
756int
757rte_pci_ioport_unmap(struct rte_pci_ioport *p)
758{
759 int ret = -1;
760
761 switch (p->dev->kdrv) {
762#ifdef VFIO_PRESENT
763 case RTE_PCI_KDRV_VFIO:
764 if (pci_vfio_is_enabled())
765 ret = pci_vfio_ioport_unmap(p);
766 break;
767#endif
768 case RTE_PCI_KDRV_IGB_UIO:
769 case RTE_PCI_KDRV_UIO_GENERIC:
770 ret = pci_uio_ioport_unmap(p);
771 break;
772 default:
773 break;
774 }
775
776 return ret;
777}
778