1
2
3
4
5
6
7
8
9
10
11
12
13#include "qemu/osdep.h"
14#include "exec/memop.h"
15#include "qemu/units.h"
16#include "qemu/error-report.h"
17#include "qemu/main-loop.h"
18#include "qemu/module.h"
19#include "qemu/range.h"
20#include "qapi/error.h"
21#include "qapi/visitor.h"
22#include <sys/ioctl.h>
23#include "hw/hw.h"
24#include "hw/nvram/fw_cfg.h"
25#include "hw/qdev-properties.h"
26#include "pci.h"
27#include "trace.h"
28
29
30static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
31{
32 return (vendor == PCI_ANY_ID || vendor == vdev->vendor_id) &&
33 (device == PCI_ANY_ID || device == vdev->device_id);
34}
35
36static bool vfio_is_vga(VFIOPCIDevice *vdev)
37{
38 PCIDevice *pdev = &vdev->pdev;
39 uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
40
41 return class == PCI_CLASS_DISPLAY_VGA;
42}
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57static const struct {
58 uint32_t vendor;
59 uint32_t device;
60} romblacklist[] = {
61 { 0x14e4, 0x168e },
62};
63
64bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
65{
66 int i;
67
68 for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
69 if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
70 trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
71 romblacklist[i].vendor,
72 romblacklist[i].device);
73 return true;
74 }
75 }
76 return false;
77}
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94typedef struct VFIOConfigWindowMatch {
95 uint32_t match;
96 uint32_t mask;
97} VFIOConfigWindowMatch;
98
99typedef struct VFIOConfigWindowQuirk {
100 struct VFIOPCIDevice *vdev;
101
102 uint32_t address_val;
103
104 uint32_t address_offset;
105 uint32_t data_offset;
106
107 bool window_enabled;
108 uint8_t bar;
109
110 MemoryRegion *addr_mem;
111 MemoryRegion *data_mem;
112
113 uint32_t nr_matches;
114 VFIOConfigWindowMatch matches[];
115} VFIOConfigWindowQuirk;
116
117static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
118 hwaddr addr,
119 unsigned size)
120{
121 VFIOConfigWindowQuirk *window = opaque;
122 VFIOPCIDevice *vdev = window->vdev;
123
124 return vfio_region_read(&vdev->bars[window->bar].region,
125 addr + window->address_offset, size);
126}
127
128static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
129 uint64_t data,
130 unsigned size)
131{
132 VFIOConfigWindowQuirk *window = opaque;
133 VFIOPCIDevice *vdev = window->vdev;
134 int i;
135
136 window->window_enabled = false;
137
138 vfio_region_write(&vdev->bars[window->bar].region,
139 addr + window->address_offset, data, size);
140
141 for (i = 0; i < window->nr_matches; i++) {
142 if ((data & ~window->matches[i].mask) == window->matches[i].match) {
143 window->window_enabled = true;
144 window->address_val = data & window->matches[i].mask;
145 trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
146 memory_region_name(window->addr_mem), data);
147 break;
148 }
149 }
150}
151
152static const MemoryRegionOps vfio_generic_window_address_quirk = {
153 .read = vfio_generic_window_quirk_address_read,
154 .write = vfio_generic_window_quirk_address_write,
155 .endianness = DEVICE_LITTLE_ENDIAN,
156};
157
158static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
159 hwaddr addr, unsigned size)
160{
161 VFIOConfigWindowQuirk *window = opaque;
162 VFIOPCIDevice *vdev = window->vdev;
163 uint64_t data;
164
165
166 data = vfio_region_read(&vdev->bars[window->bar].region,
167 addr + window->data_offset, size);
168
169 if (window->window_enabled) {
170 data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
171 trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
172 memory_region_name(window->data_mem), data);
173 }
174
175 return data;
176}
177
178static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
179 uint64_t data, unsigned size)
180{
181 VFIOConfigWindowQuirk *window = opaque;
182 VFIOPCIDevice *vdev = window->vdev;
183
184 if (window->window_enabled) {
185 vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
186 trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
187 memory_region_name(window->data_mem), data);
188 return;
189 }
190
191 vfio_region_write(&vdev->bars[window->bar].region,
192 addr + window->data_offset, data, size);
193}
194
195static const MemoryRegionOps vfio_generic_window_data_quirk = {
196 .read = vfio_generic_window_quirk_data_read,
197 .write = vfio_generic_window_quirk_data_write,
198 .endianness = DEVICE_LITTLE_ENDIAN,
199};
200
201
202
203
204
205
206
207typedef struct VFIOConfigMirrorQuirk {
208 struct VFIOPCIDevice *vdev;
209 uint32_t offset;
210 uint8_t bar;
211 MemoryRegion *mem;
212 uint8_t data[];
213} VFIOConfigMirrorQuirk;
214
215static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
216 hwaddr addr, unsigned size)
217{
218 VFIOConfigMirrorQuirk *mirror = opaque;
219 VFIOPCIDevice *vdev = mirror->vdev;
220 uint64_t data;
221
222
223 (void)vfio_region_read(&vdev->bars[mirror->bar].region,
224 addr + mirror->offset, size);
225
226 data = vfio_pci_read_config(&vdev->pdev, addr, size);
227 trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
228 memory_region_name(mirror->mem),
229 addr, data);
230 return data;
231}
232
233static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
234 uint64_t data, unsigned size)
235{
236 VFIOConfigMirrorQuirk *mirror = opaque;
237 VFIOPCIDevice *vdev = mirror->vdev;
238
239 vfio_pci_write_config(&vdev->pdev, addr, data, size);
240 trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
241 memory_region_name(mirror->mem),
242 addr, data);
243}
244
245static const MemoryRegionOps vfio_generic_mirror_quirk = {
246 .read = vfio_generic_quirk_mirror_read,
247 .write = vfio_generic_quirk_mirror_write,
248 .endianness = DEVICE_LITTLE_ENDIAN,
249};
250
251
252static bool vfio_range_contained(uint64_t first1, uint64_t len1,
253 uint64_t first2, uint64_t len2) {
254 return (first1 >= first2 && first1 + len1 <= first2 + len2);
255}
256
257#define PCI_VENDOR_ID_ATI 0x1002
258
259
260
261
262
263
264
265
266
267
268
269static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
270 hwaddr addr, unsigned size)
271{
272 VFIOPCIDevice *vdev = opaque;
273 uint64_t data = vfio_pci_read_config(&vdev->pdev,
274 PCI_BASE_ADDRESS_4 + 1, size);
275
276 trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
277
278 return data;
279}
280
281static const MemoryRegionOps vfio_ati_3c3_quirk = {
282 .read = vfio_ati_3c3_quirk_read,
283 .endianness = DEVICE_LITTLE_ENDIAN,
284};
285
286static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
287{
288 VFIOQuirk *quirk = g_new0(VFIOQuirk, 1);
289 QLIST_INIT(&quirk->ioeventfds);
290 quirk->mem = g_new0(MemoryRegion, nr_mem);
291 quirk->nr_mem = nr_mem;
292
293 return quirk;
294}
295
296static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
297{
298 QLIST_REMOVE(ioeventfd, next);
299 memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
300 true, ioeventfd->data, &ioeventfd->e);
301
302 if (ioeventfd->vfio) {
303 struct vfio_device_ioeventfd vfio_ioeventfd;
304
305 vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
306 vfio_ioeventfd.flags = ioeventfd->size;
307 vfio_ioeventfd.data = ioeventfd->data;
308 vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
309 ioeventfd->region_addr;
310 vfio_ioeventfd.fd = -1;
311
312 if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd)) {
313 error_report("Failed to remove vfio ioeventfd for %s+0x%"
314 HWADDR_PRIx"[%d]:0x%"PRIx64" (%m)",
315 memory_region_name(ioeventfd->mr), ioeventfd->addr,
316 ioeventfd->size, ioeventfd->data);
317 }
318 } else {
319 qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
320 NULL, NULL, NULL);
321 }
322
323 event_notifier_cleanup(&ioeventfd->e);
324 trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
325 (uint64_t)ioeventfd->addr, ioeventfd->size,
326 ioeventfd->data);
327 g_free(ioeventfd);
328}
329
330static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
331{
332 VFIOIOEventFD *ioeventfd, *tmp;
333
334 QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
335 if (ioeventfd->dynamic) {
336 vfio_ioeventfd_exit(vdev, ioeventfd);
337 }
338 }
339}
340
341static void vfio_ioeventfd_handler(void *opaque)
342{
343 VFIOIOEventFD *ioeventfd = opaque;
344
345 if (event_notifier_test_and_clear(&ioeventfd->e)) {
346 vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
347 ioeventfd->data, ioeventfd->size);
348 trace_vfio_ioeventfd_handler(memory_region_name(ioeventfd->mr),
349 (uint64_t)ioeventfd->addr, ioeventfd->size,
350 ioeventfd->data);
351 }
352}
353
354static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
355 MemoryRegion *mr, hwaddr addr,
356 unsigned size, uint64_t data,
357 VFIORegion *region,
358 hwaddr region_addr, bool dynamic)
359{
360 VFIOIOEventFD *ioeventfd;
361
362 if (vdev->no_kvm_ioeventfd) {
363 return NULL;
364 }
365
366 ioeventfd = g_malloc0(sizeof(*ioeventfd));
367
368 if (event_notifier_init(&ioeventfd->e, 0)) {
369 g_free(ioeventfd);
370 return NULL;
371 }
372
373
374
375
376
377 ioeventfd->mr = mr;
378 ioeventfd->addr = addr;
379 ioeventfd->size = size;
380 ioeventfd->data = data;
381 ioeventfd->dynamic = dynamic;
382
383
384
385
386 ioeventfd->region = region;
387 ioeventfd->region_addr = region_addr;
388
389 if (!vdev->no_vfio_ioeventfd) {
390 struct vfio_device_ioeventfd vfio_ioeventfd;
391
392 vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
393 vfio_ioeventfd.flags = ioeventfd->size;
394 vfio_ioeventfd.data = ioeventfd->data;
395 vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
396 ioeventfd->region_addr;
397 vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
398
399 ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
400 VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
401 }
402
403 if (!ioeventfd->vfio) {
404 qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
405 vfio_ioeventfd_handler, NULL, ioeventfd);
406 }
407
408 memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
409 true, ioeventfd->data, &ioeventfd->e);
410 trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
411 size, data, ioeventfd->vfio);
412
413 return ioeventfd;
414}
415
416static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
417{
418 VFIOQuirk *quirk;
419
420
421
422
423
424 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
425 !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
426 return;
427 }
428
429 quirk = vfio_quirk_alloc(1);
430
431 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
432 "vfio-ati-3c3-quirk", 1);
433 memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
434 3 , quirk->mem);
435
436 QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
437 quirk, next);
438
439 trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
440}
441
442
443
444
445
446
447
448
449
450
451static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
452{
453 VFIOQuirk *quirk;
454 VFIOConfigWindowQuirk *window;
455
456
457 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
458 !vdev->vga || nr != 4) {
459 return;
460 }
461
462 quirk = vfio_quirk_alloc(2);
463 window = quirk->data = g_malloc0(sizeof(*window) +
464 sizeof(VFIOConfigWindowMatch));
465 window->vdev = vdev;
466 window->address_offset = 0;
467 window->data_offset = 4;
468 window->nr_matches = 1;
469 window->matches[0].match = 0x4000;
470 window->matches[0].mask = vdev->config_size - 1;
471 window->bar = nr;
472 window->addr_mem = &quirk->mem[0];
473 window->data_mem = &quirk->mem[1];
474
475 memory_region_init_io(window->addr_mem, OBJECT(vdev),
476 &vfio_generic_window_address_quirk, window,
477 "vfio-ati-bar4-window-address-quirk", 4);
478 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
479 window->address_offset,
480 window->addr_mem, 1);
481
482 memory_region_init_io(window->data_mem, OBJECT(vdev),
483 &vfio_generic_window_data_quirk, window,
484 "vfio-ati-bar4-window-data-quirk", 4);
485 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
486 window->data_offset,
487 window->data_mem, 1);
488
489 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
490
491 trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
492}
493
494
495
496
497static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
498{
499 VFIOQuirk *quirk;
500 VFIOConfigMirrorQuirk *mirror;
501
502
503 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
504 !vdev->vga || nr != 2 || !vdev->bars[2].mem64) {
505 return;
506 }
507
508 quirk = vfio_quirk_alloc(1);
509 mirror = quirk->data = g_malloc0(sizeof(*mirror));
510 mirror->mem = quirk->mem;
511 mirror->vdev = vdev;
512 mirror->offset = 0x4000;
513 mirror->bar = nr;
514
515 memory_region_init_io(mirror->mem, OBJECT(vdev),
516 &vfio_generic_mirror_quirk, mirror,
517 "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
518 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
519 mirror->offset, mirror->mem, 1);
520
521 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
522
523 trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
524}
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
549static const char *nv3d0_states[] = { "NONE", "SELECT",
550 "WINDOW", "READ", "WRITE" };
551
552typedef struct VFIONvidia3d0Quirk {
553 VFIOPCIDevice *vdev;
554 VFIONvidia3d0State state;
555 uint32_t offset;
556} VFIONvidia3d0Quirk;
557
558static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
559 hwaddr addr, unsigned size)
560{
561 VFIONvidia3d0Quirk *quirk = opaque;
562 VFIOPCIDevice *vdev = quirk->vdev;
563
564 quirk->state = NONE;
565
566 return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
567 addr + 0x14, size);
568}
569
570static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
571 uint64_t data, unsigned size)
572{
573 VFIONvidia3d0Quirk *quirk = opaque;
574 VFIOPCIDevice *vdev = quirk->vdev;
575 VFIONvidia3d0State old_state = quirk->state;
576
577 quirk->state = NONE;
578
579 switch (data) {
580 case 0x338:
581 if (old_state == NONE) {
582 quirk->state = SELECT;
583 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
584 nv3d0_states[quirk->state]);
585 }
586 break;
587 case 0x538:
588 if (old_state == WINDOW) {
589 quirk->state = READ;
590 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
591 nv3d0_states[quirk->state]);
592 }
593 break;
594 case 0x738:
595 if (old_state == WINDOW) {
596 quirk->state = WRITE;
597 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
598 nv3d0_states[quirk->state]);
599 }
600 break;
601 }
602
603 vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
604 addr + 0x14, data, size);
605}
606
607static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
608 .read = vfio_nvidia_3d4_quirk_read,
609 .write = vfio_nvidia_3d4_quirk_write,
610 .endianness = DEVICE_LITTLE_ENDIAN,
611};
612
613static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
614 hwaddr addr, unsigned size)
615{
616 VFIONvidia3d0Quirk *quirk = opaque;
617 VFIOPCIDevice *vdev = quirk->vdev;
618 VFIONvidia3d0State old_state = quirk->state;
619 uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
620 addr + 0x10, size);
621
622 quirk->state = NONE;
623
624 if (old_state == READ &&
625 (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
626 uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
627
628 data = vfio_pci_read_config(&vdev->pdev, offset, size);
629 trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
630 offset, size, data);
631 }
632
633 return data;
634}
635
636static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
637 uint64_t data, unsigned size)
638{
639 VFIONvidia3d0Quirk *quirk = opaque;
640 VFIOPCIDevice *vdev = quirk->vdev;
641 VFIONvidia3d0State old_state = quirk->state;
642
643 quirk->state = NONE;
644
645 if (old_state == SELECT) {
646 quirk->offset = (uint32_t)data;
647 quirk->state = WINDOW;
648 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
649 nv3d0_states[quirk->state]);
650 } else if (old_state == WRITE) {
651 if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
652 uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
653
654 vfio_pci_write_config(&vdev->pdev, offset, data, size);
655 trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
656 offset, data, size);
657 return;
658 }
659 }
660
661 vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
662 addr + 0x10, data, size);
663}
664
665static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
666 .read = vfio_nvidia_3d0_quirk_read,
667 .write = vfio_nvidia_3d0_quirk_write,
668 .endianness = DEVICE_LITTLE_ENDIAN,
669};
670
671static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
672{
673 VFIOQuirk *quirk;
674 VFIONvidia3d0Quirk *data;
675
676 if (vdev->no_geforce_quirks ||
677 !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
678 !vdev->bars[1].region.size) {
679 return;
680 }
681
682 quirk = vfio_quirk_alloc(2);
683 quirk->data = data = g_malloc0(sizeof(*data));
684 data->vdev = vdev;
685
686 memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
687 data, "vfio-nvidia-3d4-quirk", 2);
688 memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
689 0x14 , &quirk->mem[0]);
690
691 memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
692 data, "vfio-nvidia-3d0-quirk", 2);
693 memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
694 0x10 , &quirk->mem[1]);
695
696 QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
697 quirk, next);
698
699 trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
700}
701
702
703
704
705
706
707
708
709typedef struct VFIONvidiaBAR5Quirk {
710 uint32_t master;
711 uint32_t enable;
712 MemoryRegion *addr_mem;
713 MemoryRegion *data_mem;
714 bool enabled;
715 VFIOConfigWindowQuirk window;
716} VFIONvidiaBAR5Quirk;
717
718static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
719{
720 VFIOPCIDevice *vdev = bar5->window.vdev;
721
722 if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
723 return;
724 }
725
726 bar5->enabled = !bar5->enabled;
727 trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
728 bar5->enabled ? "Enable" : "Disable");
729 memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
730 memory_region_set_enabled(bar5->data_mem, bar5->enabled);
731}
732
733static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
734 hwaddr addr, unsigned size)
735{
736 VFIONvidiaBAR5Quirk *bar5 = opaque;
737 VFIOPCIDevice *vdev = bar5->window.vdev;
738
739 return vfio_region_read(&vdev->bars[5].region, addr, size);
740}
741
742static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
743 uint64_t data, unsigned size)
744{
745 VFIONvidiaBAR5Quirk *bar5 = opaque;
746 VFIOPCIDevice *vdev = bar5->window.vdev;
747
748 vfio_region_write(&vdev->bars[5].region, addr, data, size);
749
750 bar5->master = data;
751 vfio_nvidia_bar5_enable(bar5);
752}
753
754static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
755 .read = vfio_nvidia_bar5_quirk_master_read,
756 .write = vfio_nvidia_bar5_quirk_master_write,
757 .endianness = DEVICE_LITTLE_ENDIAN,
758};
759
760static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
761 hwaddr addr, unsigned size)
762{
763 VFIONvidiaBAR5Quirk *bar5 = opaque;
764 VFIOPCIDevice *vdev = bar5->window.vdev;
765
766 return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
767}
768
769static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
770 uint64_t data, unsigned size)
771{
772 VFIONvidiaBAR5Quirk *bar5 = opaque;
773 VFIOPCIDevice *vdev = bar5->window.vdev;
774
775 vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
776
777 bar5->enable = data;
778 vfio_nvidia_bar5_enable(bar5);
779}
780
781static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
782 .read = vfio_nvidia_bar5_quirk_enable_read,
783 .write = vfio_nvidia_bar5_quirk_enable_write,
784 .endianness = DEVICE_LITTLE_ENDIAN,
785};
786
787static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
788{
789 VFIOQuirk *quirk;
790 VFIONvidiaBAR5Quirk *bar5;
791 VFIOConfigWindowQuirk *window;
792
793 if (vdev->no_geforce_quirks ||
794 !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
795 !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
796 return;
797 }
798
799 quirk = vfio_quirk_alloc(4);
800 bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
801 (sizeof(VFIOConfigWindowMatch) * 2));
802 window = &bar5->window;
803
804 window->vdev = vdev;
805 window->address_offset = 0x8;
806 window->data_offset = 0xc;
807 window->nr_matches = 2;
808 window->matches[0].match = 0x1800;
809 window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
810 window->matches[1].match = 0x88000;
811 window->matches[1].mask = vdev->config_size - 1;
812 window->bar = nr;
813 window->addr_mem = bar5->addr_mem = &quirk->mem[0];
814 window->data_mem = bar5->data_mem = &quirk->mem[1];
815
816 memory_region_init_io(window->addr_mem, OBJECT(vdev),
817 &vfio_generic_window_address_quirk, window,
818 "vfio-nvidia-bar5-window-address-quirk", 4);
819 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
820 window->address_offset,
821 window->addr_mem, 1);
822 memory_region_set_enabled(window->addr_mem, false);
823
824 memory_region_init_io(window->data_mem, OBJECT(vdev),
825 &vfio_generic_window_data_quirk, window,
826 "vfio-nvidia-bar5-window-data-quirk", 4);
827 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
828 window->data_offset,
829 window->data_mem, 1);
830 memory_region_set_enabled(window->data_mem, false);
831
832 memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
833 &vfio_nvidia_bar5_quirk_master, bar5,
834 "vfio-nvidia-bar5-master-quirk", 4);
835 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
836 0, &quirk->mem[2], 1);
837
838 memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
839 &vfio_nvidia_bar5_quirk_enable, bar5,
840 "vfio-nvidia-bar5-enable-quirk", 4);
841 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
842 4, &quirk->mem[3], 1);
843
844 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
845
846 trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
847}
848
849typedef struct LastDataSet {
850 VFIOQuirk *quirk;
851 hwaddr addr;
852 uint64_t data;
853 unsigned size;
854 int hits;
855 int added;
856} LastDataSet;
857
858#define MAX_DYN_IOEVENTFD 10
859#define HITS_FOR_IOEVENTFD 10
860
861
862
863
864
865static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
866 uint64_t data, unsigned size)
867{
868 VFIOConfigMirrorQuirk *mirror = opaque;
869 VFIOPCIDevice *vdev = mirror->vdev;
870 PCIDevice *pdev = &vdev->pdev;
871 LastDataSet *last = (LastDataSet *)&mirror->data;
872
873 vfio_generic_quirk_mirror_write(opaque, addr, data, size);
874
875
876
877
878
879
880 if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
881 vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
882 vfio_region_write(&vdev->bars[mirror->bar].region,
883 addr + mirror->offset, data, size);
884 trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
885 }
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902 if (!vdev->no_kvm_ioeventfd &&
903 addr >= PCI_STD_HEADER_SIZEOF && last->added <= MAX_DYN_IOEVENTFD) {
904 if (addr != last->addr || data != last->data || size != last->size) {
905 last->addr = addr;
906 last->data = data;
907 last->size = size;
908 last->hits = 1;
909 } else if (++last->hits >= HITS_FOR_IOEVENTFD) {
910 if (last->added < MAX_DYN_IOEVENTFD) {
911 VFIOIOEventFD *ioeventfd;
912 ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size,
913 data, &vdev->bars[mirror->bar].region,
914 mirror->offset + addr, true);
915 if (ioeventfd) {
916 VFIOQuirk *quirk = last->quirk;
917
918 QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
919 last->added++;
920 }
921 } else {
922 last->added++;
923 warn_report("NVIDIA ioeventfd queue full for %s, unable to "
924 "accelerate 0x%"HWADDR_PRIx", data 0x%"PRIx64", "
925 "size %u", vdev->vbasedev.name, addr, data, size);
926 }
927 }
928 }
929}
930
931static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
932 .read = vfio_generic_quirk_mirror_read,
933 .write = vfio_nvidia_quirk_mirror_write,
934 .endianness = DEVICE_LITTLE_ENDIAN,
935};
936
937static void vfio_nvidia_bar0_quirk_reset(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
938{
939 VFIOConfigMirrorQuirk *mirror = quirk->data;
940 LastDataSet *last = (LastDataSet *)&mirror->data;
941
942 last->addr = last->data = last->size = last->hits = last->added = 0;
943
944 vfio_drop_dynamic_eventfds(vdev, quirk);
945}
946
947static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
948{
949 VFIOQuirk *quirk;
950 VFIOConfigMirrorQuirk *mirror;
951 LastDataSet *last;
952
953 if (vdev->no_geforce_quirks ||
954 !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
955 !vfio_is_vga(vdev) || nr != 0) {
956 return;
957 }
958
959 quirk = vfio_quirk_alloc(1);
960 quirk->reset = vfio_nvidia_bar0_quirk_reset;
961 mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
962 mirror->mem = quirk->mem;
963 mirror->vdev = vdev;
964 mirror->offset = 0x88000;
965 mirror->bar = nr;
966 last = (LastDataSet *)&mirror->data;
967 last->quirk = quirk;
968
969 memory_region_init_io(mirror->mem, OBJECT(vdev),
970 &vfio_nvidia_mirror_quirk, mirror,
971 "vfio-nvidia-bar0-88000-mirror-quirk",
972 vdev->config_size);
973 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
974 mirror->offset, mirror->mem, 1);
975
976 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
977
978
979 if (vdev->vga) {
980 quirk = vfio_quirk_alloc(1);
981 quirk->reset = vfio_nvidia_bar0_quirk_reset;
982 mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
983 mirror->mem = quirk->mem;
984 mirror->vdev = vdev;
985 mirror->offset = 0x1800;
986 mirror->bar = nr;
987 last = (LastDataSet *)&mirror->data;
988 last->quirk = quirk;
989
990 memory_region_init_io(mirror->mem, OBJECT(vdev),
991 &vfio_nvidia_mirror_quirk, mirror,
992 "vfio-nvidia-bar0-1800-mirror-quirk",
993 PCI_CONFIG_SPACE_SIZE);
994 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
995 mirror->offset, mirror->mem, 1);
996
997 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
998 }
999
1000 trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
1001}
1002
1003
1004
1005
1006
1007
1008
1009#define PCI_VENDOR_ID_REALTEK 0x10ec
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033typedef struct VFIOrtl8168Quirk {
1034 VFIOPCIDevice *vdev;
1035 uint32_t addr;
1036 uint32_t data;
1037 bool enabled;
1038} VFIOrtl8168Quirk;
1039
1040static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
1041 hwaddr addr, unsigned size)
1042{
1043 VFIOrtl8168Quirk *rtl = opaque;
1044 VFIOPCIDevice *vdev = rtl->vdev;
1045 uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
1046
1047 if (rtl->enabled) {
1048 data = rtl->addr ^ 0x80000000U;
1049 trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
1050 }
1051
1052 return data;
1053}
1054
1055static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
1056 uint64_t data, unsigned size)
1057{
1058 VFIOrtl8168Quirk *rtl = opaque;
1059 VFIOPCIDevice *vdev = rtl->vdev;
1060
1061 rtl->enabled = false;
1062
1063 if ((data & 0x7fff0000) == 0x10000) {
1064 rtl->enabled = true;
1065 rtl->addr = (uint32_t)data;
1066
1067 if (data & 0x80000000U) {
1068 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
1069 hwaddr offset = data & 0xfff;
1070 uint64_t val = rtl->data;
1071
1072 trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
1073 (uint16_t)offset, val);
1074
1075
1076 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
1077 offset, val,
1078 size_memop(size) | MO_LE,
1079 MEMTXATTRS_UNSPECIFIED);
1080 }
1081 return;
1082 }
1083 }
1084
1085 vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
1086}
1087
1088static const MemoryRegionOps vfio_rtl_address_quirk = {
1089 .read = vfio_rtl8168_quirk_address_read,
1090 .write = vfio_rtl8168_quirk_address_write,
1091 .valid = {
1092 .min_access_size = 4,
1093 .max_access_size = 4,
1094 .unaligned = false,
1095 },
1096 .endianness = DEVICE_LITTLE_ENDIAN,
1097};
1098
1099static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
1100 hwaddr addr, unsigned size)
1101{
1102 VFIOrtl8168Quirk *rtl = opaque;
1103 VFIOPCIDevice *vdev = rtl->vdev;
1104 uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
1105
1106 if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
1107 hwaddr offset = rtl->addr & 0xfff;
1108 memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
1109 &data, size_memop(size) | MO_LE,
1110 MEMTXATTRS_UNSPECIFIED);
1111 trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
1112 }
1113
1114 return data;
1115}
1116
1117static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
1118 uint64_t data, unsigned size)
1119{
1120 VFIOrtl8168Quirk *rtl = opaque;
1121 VFIOPCIDevice *vdev = rtl->vdev;
1122
1123 rtl->data = (uint32_t)data;
1124
1125 vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
1126}
1127
1128static const MemoryRegionOps vfio_rtl_data_quirk = {
1129 .read = vfio_rtl8168_quirk_data_read,
1130 .write = vfio_rtl8168_quirk_data_write,
1131 .valid = {
1132 .min_access_size = 4,
1133 .max_access_size = 4,
1134 .unaligned = false,
1135 },
1136 .endianness = DEVICE_LITTLE_ENDIAN,
1137};
1138
1139static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
1140{
1141 VFIOQuirk *quirk;
1142 VFIOrtl8168Quirk *rtl;
1143
1144 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
1145 return;
1146 }
1147
1148 quirk = vfio_quirk_alloc(2);
1149 quirk->data = rtl = g_malloc0(sizeof(*rtl));
1150 rtl->vdev = vdev;
1151
1152 memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
1153 &vfio_rtl_address_quirk, rtl,
1154 "vfio-rtl8168-window-address-quirk", 4);
1155 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1156 0x74, &quirk->mem[0], 1);
1157
1158 memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
1159 &vfio_rtl_data_quirk, rtl,
1160 "vfio-rtl8168-window-data-quirk", 4);
1161 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1162 0x70, &quirk->mem[1], 1);
1163
1164 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1165
1166 trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
1167}
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207static int igd_gen(VFIOPCIDevice *vdev)
1208{
1209 if ((vdev->device_id & 0xfff) == 0xa84) {
1210 return 8;
1211 }
1212
1213 switch (vdev->device_id & 0xff00) {
1214
1215 case 0x0000:
1216 case 0x2500:
1217 case 0x2700:
1218 case 0x2900:
1219 case 0x2a00:
1220 case 0x2e00:
1221 case 0x3500:
1222 case 0xa000:
1223 return -1;
1224
1225 case 0x0100:
1226 case 0x0400:
1227 case 0x0a00:
1228 case 0x0c00:
1229 case 0x0d00:
1230 case 0x0f00:
1231 return 6;
1232
1233 case 0x1600:
1234 case 0x1900:
1235 case 0x2200:
1236 case 0x5900:
1237 return 8;
1238 }
1239
1240 return 8;
1241}
1242
1243typedef struct VFIOIGDQuirk {
1244 struct VFIOPCIDevice *vdev;
1245 uint32_t index;
1246 uint32_t bdsm;
1247} VFIOIGDQuirk;
1248
1249#define IGD_GMCH 0x50
1250#define IGD_BDSM 0x5c
1251#define IGD_ASLS 0xfc
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
1262 struct vfio_region_info *info, Error **errp)
1263{
1264 int ret;
1265
1266 vdev->igd_opregion = g_malloc0(info->size);
1267 ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
1268 info->size, info->offset);
1269 if (ret != info->size) {
1270 error_setg(errp, "failed to read IGD OpRegion");
1271 g_free(vdev->igd_opregion);
1272 vdev->igd_opregion = NULL;
1273 return -EINVAL;
1274 }
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289 fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
1290 vdev->igd_opregion, info->size);
1291
1292 trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
1293
1294 pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
1295 pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
1296 pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
1297
1298 return 0;
1299}
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309typedef struct {
1310 uint8_t offset;
1311 uint8_t len;
1312} IGDHostInfo;
1313
1314static const IGDHostInfo igd_host_bridge_infos[] = {
1315 {PCI_REVISION_ID, 2},
1316 {PCI_SUBSYSTEM_VENDOR_ID, 2},
1317 {PCI_SUBSYSTEM_ID, 2},
1318};
1319
1320static const IGDHostInfo igd_lpc_bridge_infos[] = {
1321 {PCI_VENDOR_ID, 2},
1322 {PCI_DEVICE_ID, 2},
1323 {PCI_REVISION_ID, 2},
1324 {PCI_SUBSYSTEM_VENDOR_ID, 2},
1325 {PCI_SUBSYSTEM_ID, 2},
1326};
1327
1328static int vfio_pci_igd_copy(VFIOPCIDevice *vdev, PCIDevice *pdev,
1329 struct vfio_region_info *info,
1330 const IGDHostInfo *list, int len)
1331{
1332 int i, ret;
1333
1334 for (i = 0; i < len; i++) {
1335 ret = pread(vdev->vbasedev.fd, pdev->config + list[i].offset,
1336 list[i].len, info->offset + list[i].offset);
1337 if (ret != list[i].len) {
1338 error_report("IGD copy failed: %m");
1339 return -errno;
1340 }
1341 }
1342
1343 return 0;
1344}
1345
1346
1347
1348
1349static int vfio_pci_igd_host_init(VFIOPCIDevice *vdev,
1350 struct vfio_region_info *info)
1351{
1352 PCIBus *bus;
1353 PCIDevice *host_bridge;
1354 int ret;
1355
1356 bus = pci_device_root_bus(&vdev->pdev);
1357 host_bridge = pci_find_device(bus, 0, PCI_DEVFN(0, 0));
1358
1359 if (!host_bridge) {
1360 error_report("Can't find host bridge");
1361 return -ENODEV;
1362 }
1363
1364 ret = vfio_pci_igd_copy(vdev, host_bridge, info, igd_host_bridge_infos,
1365 ARRAY_SIZE(igd_host_bridge_infos));
1366 if (!ret) {
1367 trace_vfio_pci_igd_host_bridge_enabled(vdev->vbasedev.name);
1368 }
1369
1370 return ret;
1371}
1372
1373
1374
1375
1376
1377
1378
1379static void vfio_pci_igd_lpc_bridge_realize(PCIDevice *pdev, Error **errp)
1380{
1381 if (pdev->devfn != PCI_DEVFN(0x1f, 0)) {
1382 error_setg(errp, "VFIO dummy ISA/LPC bridge must have address 1f.0");
1383 }
1384}
1385
1386static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data)
1387{
1388 DeviceClass *dc = DEVICE_CLASS(klass);
1389 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1390
1391 set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
1392 dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment";
1393 dc->hotpluggable = false;
1394 k->realize = vfio_pci_igd_lpc_bridge_realize;
1395 k->class_id = PCI_CLASS_BRIDGE_ISA;
1396}
1397
1398static TypeInfo vfio_pci_igd_lpc_bridge_info = {
1399 .name = "vfio-pci-igd-lpc-bridge",
1400 .parent = TYPE_PCI_DEVICE,
1401 .class_init = vfio_pci_igd_lpc_bridge_class_init,
1402 .interfaces = (InterfaceInfo[]) {
1403 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1404 { },
1405 },
1406};
1407
1408static void vfio_pci_igd_register_types(void)
1409{
1410 type_register_static(&vfio_pci_igd_lpc_bridge_info);
1411}
1412
1413type_init(vfio_pci_igd_register_types)
1414
1415static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
1416 struct vfio_region_info *info)
1417{
1418 PCIDevice *lpc_bridge;
1419 int ret;
1420
1421 lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
1422 0, PCI_DEVFN(0x1f, 0));
1423 if (!lpc_bridge) {
1424 lpc_bridge = pci_create_simple(pci_device_root_bus(&vdev->pdev),
1425 PCI_DEVFN(0x1f, 0), "vfio-pci-igd-lpc-bridge");
1426 }
1427
1428 ret = vfio_pci_igd_copy(vdev, lpc_bridge, info, igd_lpc_bridge_infos,
1429 ARRAY_SIZE(igd_lpc_bridge_infos));
1430 if (!ret) {
1431 trace_vfio_pci_igd_lpc_bridge_enabled(vdev->vbasedev.name);
1432 }
1433
1434 return ret;
1435}
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445static int vfio_igd_gtt_max(VFIOPCIDevice *vdev)
1446{
1447 uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
1448 int ggms, gen = igd_gen(vdev);
1449
1450 gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
1451 ggms = (gmch >> (gen < 8 ? 8 : 6)) & 0x3;
1452 if (gen > 6) {
1453 ggms = 1 << ggms;
1454 }
1455
1456 ggms *= MiB;
1457
1458 return (ggms / (4 * KiB)) * (gen < 8 ? 4 : 8);
1459}
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472static uint64_t vfio_igd_quirk_data_read(void *opaque,
1473 hwaddr addr, unsigned size)
1474{
1475 VFIOIGDQuirk *igd = opaque;
1476 VFIOPCIDevice *vdev = igd->vdev;
1477
1478 igd->index = ~0;
1479
1480 return vfio_region_read(&vdev->bars[4].region, addr + 4, size);
1481}
1482
1483static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr,
1484 uint64_t data, unsigned size)
1485{
1486 VFIOIGDQuirk *igd = opaque;
1487 VFIOPCIDevice *vdev = igd->vdev;
1488 uint64_t val = data;
1489 int gen = igd_gen(vdev);
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505 if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) {
1506 if (gen < 8 || (igd->index % 8 == 1)) {
1507 uint32_t base;
1508
1509 base = pci_get_long(vdev->pdev.config + IGD_BDSM);
1510 if (!base) {
1511 hw_error("vfio-igd: Guest attempted to program IGD GTT before "
1512 "BIOS reserved stolen memory. Unsupported BIOS?");
1513 }
1514
1515 val = data - igd->bdsm + base;
1516 } else {
1517 val = 0;
1518 }
1519
1520 trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name,
1521 igd->index, data, val);
1522 }
1523
1524 vfio_region_write(&vdev->bars[4].region, addr + 4, val, size);
1525
1526 igd->index = ~0;
1527}
1528
1529static const MemoryRegionOps vfio_igd_data_quirk = {
1530 .read = vfio_igd_quirk_data_read,
1531 .write = vfio_igd_quirk_data_write,
1532 .endianness = DEVICE_LITTLE_ENDIAN,
1533};
1534
1535static uint64_t vfio_igd_quirk_index_read(void *opaque,
1536 hwaddr addr, unsigned size)
1537{
1538 VFIOIGDQuirk *igd = opaque;
1539 VFIOPCIDevice *vdev = igd->vdev;
1540
1541 igd->index = ~0;
1542
1543 return vfio_region_read(&vdev->bars[4].region, addr, size);
1544}
1545
1546static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr,
1547 uint64_t data, unsigned size)
1548{
1549 VFIOIGDQuirk *igd = opaque;
1550 VFIOPCIDevice *vdev = igd->vdev;
1551
1552 igd->index = data;
1553
1554 vfio_region_write(&vdev->bars[4].region, addr, data, size);
1555}
1556
1557static const MemoryRegionOps vfio_igd_index_quirk = {
1558 .read = vfio_igd_quirk_index_read,
1559 .write = vfio_igd_quirk_index_write,
1560 .endianness = DEVICE_LITTLE_ENDIAN,
1561};
1562
1563static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
1564{
1565 struct vfio_region_info *rom = NULL, *opregion = NULL,
1566 *host = NULL, *lpc = NULL;
1567 VFIOQuirk *quirk;
1568 VFIOIGDQuirk *igd;
1569 PCIDevice *lpc_bridge;
1570 int i, ret, ggms_mb, gms_mb = 0, gen;
1571 uint64_t *bdsm_size;
1572 uint32_t gmch;
1573 uint16_t cmd_orig, cmd;
1574 Error *err = NULL;
1575
1576
1577
1578
1579
1580
1581 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
1582 !vfio_is_vga(vdev) || nr != 4 ||
1583 &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
1584 0, PCI_DEVFN(0x2, 0))) {
1585 return;
1586 }
1587
1588
1589
1590
1591
1592
1593 lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
1594 0, PCI_DEVFN(0x1f, 0));
1595 if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
1596 "vfio-pci-igd-lpc-bridge")) {
1597 error_report("IGD device %s cannot support legacy mode due to existing "
1598 "devices at address 1f.0", vdev->vbasedev.name);
1599 return;
1600 }
1601
1602
1603
1604
1605
1606
1607 gen = igd_gen(vdev);
1608 if (gen != 6 && gen != 8) {
1609 error_report("IGD device %s is unsupported in legacy mode, "
1610 "try SandyBridge or newer", vdev->vbasedev.name);
1611 return;
1612 }
1613
1614
1615
1616
1617
1618
1619 ret = vfio_get_region_info(&vdev->vbasedev,
1620 VFIO_PCI_ROM_REGION_INDEX, &rom);
1621 if ((ret || !rom->size) && !vdev->pdev.romfile) {
1622 error_report("IGD device %s has no ROM, legacy mode disabled",
1623 vdev->vbasedev.name);
1624 goto out;
1625 }
1626
1627
1628
1629
1630
1631 if (vdev->pdev.qdev.hotplugged) {
1632 error_report("IGD device %s hotplugged, ROM disabled, "
1633 "legacy mode disabled", vdev->vbasedev.name);
1634 vdev->rom_read_failed = true;
1635 goto out;
1636 }
1637
1638
1639
1640
1641
1642 ret = vfio_get_dev_region_info(&vdev->vbasedev,
1643 VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
1644 VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
1645 if (ret) {
1646 error_report("IGD device %s does not support OpRegion access,"
1647 "legacy mode disabled", vdev->vbasedev.name);
1648 goto out;
1649 }
1650
1651 ret = vfio_get_dev_region_info(&vdev->vbasedev,
1652 VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
1653 VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
1654 if (ret) {
1655 error_report("IGD device %s does not support host bridge access,"
1656 "legacy mode disabled", vdev->vbasedev.name);
1657 goto out;
1658 }
1659
1660 ret = vfio_get_dev_region_info(&vdev->vbasedev,
1661 VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
1662 VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);
1663 if (ret) {
1664 error_report("IGD device %s does not support LPC bridge access,"
1665 "legacy mode disabled", vdev->vbasedev.name);
1666 goto out;
1667 }
1668
1669 gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
1670
1671
1672
1673
1674
1675
1676 if (!(gmch & 0x2) && !vdev->vga && vfio_populate_vga(vdev, &err)) {
1677 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
1678 error_report("IGD device %s failed to enable VGA access, "
1679 "legacy mode disabled", vdev->vbasedev.name);
1680 goto out;
1681 }
1682
1683
1684 ret = vfio_pci_igd_lpc_init(vdev, lpc);
1685 if (ret) {
1686 error_report("IGD device %s failed to create LPC bridge, "
1687 "legacy mode disabled", vdev->vbasedev.name);
1688 goto out;
1689 }
1690
1691
1692 ret = vfio_pci_igd_host_init(vdev, host);
1693 if (ret) {
1694 error_report("IGD device %s failed to modify host bridge, "
1695 "legacy mode disabled", vdev->vbasedev.name);
1696 goto out;
1697 }
1698
1699
1700 ret = vfio_pci_igd_opregion_init(vdev, opregion, &err);
1701 if (ret) {
1702 error_append_hint(&err, "IGD legacy mode disabled\n");
1703 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
1704 goto out;
1705 }
1706
1707
1708 quirk = vfio_quirk_alloc(2);
1709 igd = quirk->data = g_malloc0(sizeof(*igd));
1710 igd->vdev = vdev;
1711 igd->index = ~0;
1712 igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4);
1713 igd->bdsm &= ~((1 * MiB) - 1);
1714
1715 memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk,
1716 igd, "vfio-igd-index-quirk", 4);
1717 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1718 0, &quirk->mem[0], 1);
1719
1720 memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk,
1721 igd, "vfio-igd-data-quirk", 4);
1722 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1723 4, &quirk->mem[1], 1);
1724
1725 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1726
1727
1728 ggms_mb = (gmch >> (gen < 8 ? 8 : 6)) & 0x3;
1729 if (gen > 6) {
1730 ggms_mb = 1 << ggms_mb;
1731 }
1732
1733
1734
1735
1736
1737
1738
1739 gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));
1740
1741 if (vdev->igd_gms) {
1742 if (vdev->igd_gms <= 0x10) {
1743 gms_mb = vdev->igd_gms * 32;
1744 gmch |= vdev->igd_gms << (gen < 8 ? 3 : 8);
1745 } else {
1746 error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms);
1747 vdev->igd_gms = 0;
1748 }
1749 }
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759 bdsm_size = g_malloc(sizeof(*bdsm_size));
1760 *bdsm_size = cpu_to_le64((ggms_mb + gms_mb) * MiB);
1761 fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
1762 bdsm_size, sizeof(*bdsm_size));
1763
1764
1765 pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
1766 pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
1767 pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
1768
1769
1770 pci_set_long(vdev->pdev.config + IGD_BDSM, 0);
1771 pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0);
1772 pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0);
1773
1774
1775
1776
1777
1778
1779
1780 if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
1781 vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
1782 error_report("IGD device %s - failed to read PCI command register",
1783 vdev->vbasedev.name);
1784 }
1785
1786 cmd = cmd_orig | PCI_COMMAND_IO;
1787
1788 if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd),
1789 vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) {
1790 error_report("IGD device %s - failed to write PCI command register",
1791 vdev->vbasedev.name);
1792 }
1793
1794 for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) {
1795 vfio_region_write(&vdev->bars[4].region, 0, i, 4);
1796 vfio_region_write(&vdev->bars[4].region, 4, 0, 4);
1797 }
1798
1799 if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
1800 vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
1801 error_report("IGD device %s - failed to restore PCI command register",
1802 vdev->vbasedev.name);
1803 }
1804
1805 trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, ggms_mb + gms_mb);
1806
1807out:
1808 g_free(rom);
1809 g_free(opregion);
1810 g_free(host);
1811 g_free(lpc);
1812}
1813
1814
1815
1816
1817void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
1818{
1819 vfio_vga_probe_ati_3c3_quirk(vdev);
1820 vfio_vga_probe_nvidia_3d0_quirk(vdev);
1821}
1822
1823void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
1824{
1825 VFIOQuirk *quirk;
1826 int i, j;
1827
1828 for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
1829 QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
1830 for (j = 0; j < quirk->nr_mem; j++) {
1831 memory_region_del_subregion(&vdev->vga->region[i].mem,
1832 &quirk->mem[j]);
1833 }
1834 }
1835 }
1836}
1837
1838void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
1839{
1840 int i, j;
1841
1842 for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
1843 while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
1844 VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
1845 QLIST_REMOVE(quirk, next);
1846 for (j = 0; j < quirk->nr_mem; j++) {
1847 object_unparent(OBJECT(&quirk->mem[j]));
1848 }
1849 g_free(quirk->mem);
1850 g_free(quirk->data);
1851 g_free(quirk);
1852 }
1853 }
1854}
1855
1856void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
1857{
1858 vfio_probe_ati_bar4_quirk(vdev, nr);
1859 vfio_probe_ati_bar2_quirk(vdev, nr);
1860 vfio_probe_nvidia_bar5_quirk(vdev, nr);
1861 vfio_probe_nvidia_bar0_quirk(vdev, nr);
1862 vfio_probe_rtl8168_bar2_quirk(vdev, nr);
1863 vfio_probe_igd_bar4_quirk(vdev, nr);
1864}
1865
1866void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
1867{
1868 VFIOBAR *bar = &vdev->bars[nr];
1869 VFIOQuirk *quirk;
1870 int i;
1871
1872 QLIST_FOREACH(quirk, &bar->quirks, next) {
1873 while (!QLIST_EMPTY(&quirk->ioeventfds)) {
1874 vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
1875 }
1876
1877 for (i = 0; i < quirk->nr_mem; i++) {
1878 memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
1879 }
1880 }
1881}
1882
1883void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
1884{
1885 VFIOBAR *bar = &vdev->bars[nr];
1886 int i;
1887
1888 while (!QLIST_EMPTY(&bar->quirks)) {
1889 VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
1890 QLIST_REMOVE(quirk, next);
1891 for (i = 0; i < quirk->nr_mem; i++) {
1892 object_unparent(OBJECT(&quirk->mem[i]));
1893 }
1894 g_free(quirk->mem);
1895 g_free(quirk->data);
1896 g_free(quirk);
1897 }
1898}
1899
1900
1901
1902
1903void vfio_quirk_reset(VFIOPCIDevice *vdev)
1904{
1905 int i;
1906
1907 for (i = 0; i < PCI_ROM_SLOT; i++) {
1908 VFIOQuirk *quirk;
1909 VFIOBAR *bar = &vdev->bars[i];
1910
1911 QLIST_FOREACH(quirk, &bar->quirks, next) {
1912 if (quirk->reset) {
1913 quirk->reset(vdev, quirk);
1914 }
1915 }
1916 }
1917}
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
1941{
1942 uint32_t clk, pc_c;
1943
1944
1945
1946
1947
1948 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1949 clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1950 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
1951 pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1952
1953 return (!(clk & 1) && (0x20100 <= pc_c));
1954}
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
1965{
1966 uint32_t misc, fuse;
1967 bool a, b;
1968
1969 vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
1970 fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1971 b = fuse & 64;
1972
1973 vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
1974 misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1975 a = misc & 2;
1976
1977 if (a == b) {
1978 vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
1979 vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1980 }
1981}
1982
1983static int vfio_radeon_reset(VFIOPCIDevice *vdev)
1984{
1985 PCIDevice *pdev = &vdev->pdev;
1986 int i, ret = 0;
1987 uint32_t data;
1988
1989
1990 if (vdev->vbasedev.reset_works) {
1991 trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
1992 return -ENODEV;
1993 }
1994
1995
1996 vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
1997
1998
1999 if (!vfio_radeon_smc_is_running(vdev)) {
2000 ret = -EINVAL;
2001 trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
2002 goto out;
2003 }
2004
2005
2006 vfio_radeon_set_gfx_only_reset(vdev);
2007
2008
2009 vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
2010 usleep(100);
2011
2012
2013 for (i = 0; i < 100000; i++) {
2014 if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
2015 goto reset_smc;
2016 }
2017 usleep(1);
2018 }
2019
2020 trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
2021
2022reset_smc:
2023
2024 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
2025 data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
2026 data |= 1;
2027 vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
2028
2029
2030 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
2031 data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
2032 data |= 1;
2033 vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
2034
2035 trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
2036
2037out:
2038
2039 vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
2040
2041 return ret;
2042}
2043
2044void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
2045{
2046 switch (vdev->vendor_id) {
2047 case 0x1002:
2048 switch (vdev->device_id) {
2049
2050 case 0x6649:
2051 case 0x6650:
2052 case 0x6651:
2053 case 0x6658:
2054 case 0x665c:
2055 case 0x665d:
2056
2057 case 0x67A0:
2058 case 0x67A1:
2059 case 0x67A2:
2060 case 0x67A8:
2061 case 0x67A9:
2062 case 0x67AA:
2063 case 0x67B0:
2064 case 0x67B1:
2065 case 0x67B8:
2066 case 0x67B9:
2067 case 0x67BA:
2068 case 0x67BE:
2069 vdev->resetfn = vfio_radeon_reset;
2070 trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
2071 break;
2072 }
2073 break;
2074 }
2075}
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
2095 const char *name, void *opaque,
2096 Error **errp)
2097{
2098 DeviceState *dev = DEVICE(obj);
2099 Property *prop = opaque;
2100 uint8_t *ptr = qdev_get_prop_ptr(dev, prop);
2101
2102 visit_type_uint8(v, name, ptr, errp);
2103}
2104
2105static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
2106 const char *name, void *opaque,
2107 Error **errp)
2108{
2109 DeviceState *dev = DEVICE(obj);
2110 Property *prop = opaque;
2111 uint8_t value, *ptr = qdev_get_prop_ptr(dev, prop);
2112 Error *local_err = NULL;
2113
2114 if (dev->realized) {
2115 qdev_prop_set_after_realize(dev, name, errp);
2116 return;
2117 }
2118
2119 visit_type_uint8(v, name, &value, &local_err);
2120 if (local_err) {
2121 error_propagate(errp, local_err);
2122 return;
2123 }
2124
2125 if (value & ~0xF) {
2126 error_setg(errp, "Property %s: valid range 0-15", name);
2127 return;
2128 }
2129
2130 *ptr = value;
2131}
2132
2133const PropertyInfo qdev_prop_nv_gpudirect_clique = {
2134 .name = "uint4",
2135 .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
2136 .get = get_nv_gpudirect_clique_id,
2137 .set = set_nv_gpudirect_clique_id,
2138};
2139
2140static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
2141{
2142 PCIDevice *pdev = &vdev->pdev;
2143 int ret, pos = 0xC8;
2144
2145 if (vdev->nv_gpudirect_clique == 0xFF) {
2146 return 0;
2147 }
2148
2149 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
2150 error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
2151 return -EINVAL;
2152 }
2153
2154 if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
2155 PCI_BASE_CLASS_DISPLAY) {
2156 error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
2157 return -EINVAL;
2158 }
2159
2160 ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
2161 if (ret < 0) {
2162 error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
2163 return ret;
2164 }
2165
2166 memset(vdev->emulated_config_bits + pos, 0xFF, 8);
2167 pos += PCI_CAP_FLAGS;
2168 pci_set_byte(pdev->config + pos++, 8);
2169 pci_set_byte(pdev->config + pos++, 'P');
2170 pci_set_byte(pdev->config + pos++, '2');
2171 pci_set_byte(pdev->config + pos++, 'P');
2172 pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
2173 pci_set_byte(pdev->config + pos, 0);
2174
2175 return 0;
2176}
2177
2178int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
2179{
2180 int ret;
2181
2182 ret = vfio_add_nv_gpudirect_cap(vdev, errp);
2183 if (ret) {
2184 return ret;
2185 }
2186
2187 return 0;
2188}
2189
2190static void vfio_pci_nvlink2_get_tgt(Object *obj, Visitor *v,
2191 const char *name,
2192 void *opaque, Error **errp)
2193{
2194 uint64_t tgt = (uintptr_t) opaque;
2195 visit_type_uint64(v, name, &tgt, errp);
2196}
2197
2198static void vfio_pci_nvlink2_get_link_speed(Object *obj, Visitor *v,
2199 const char *name,
2200 void *opaque, Error **errp)
2201{
2202 uint32_t link_speed = (uint32_t)(uintptr_t) opaque;
2203 visit_type_uint32(v, name, &link_speed, errp);
2204}
2205
2206int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp)
2207{
2208 int ret;
2209 void *p;
2210 struct vfio_region_info *nv2reg = NULL;
2211 struct vfio_info_cap_header *hdr;
2212 struct vfio_region_info_cap_nvlink2_ssatgt *cap;
2213 VFIOQuirk *quirk;
2214
2215 ret = vfio_get_dev_region_info(&vdev->vbasedev,
2216 VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
2217 PCI_VENDOR_ID_NVIDIA,
2218 VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
2219 &nv2reg);
2220 if (ret) {
2221 return ret;
2222 }
2223
2224 hdr = vfio_get_region_info_cap(nv2reg, VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
2225 if (!hdr) {
2226 ret = -ENODEV;
2227 goto free_exit;
2228 }
2229 cap = (void *) hdr;
2230
2231 p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE | PROT_EXEC,
2232 MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset);
2233 if (p == MAP_FAILED) {
2234 ret = -errno;
2235 goto free_exit;
2236 }
2237
2238 quirk = vfio_quirk_alloc(1);
2239 memory_region_init_ram_ptr(&quirk->mem[0], OBJECT(vdev), "nvlink2-mr",
2240 nv2reg->size, p);
2241 QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
2242
2243 object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
2244 vfio_pci_nvlink2_get_tgt, NULL, NULL,
2245 (void *) (uintptr_t) cap->tgt, NULL);
2246 trace_vfio_pci_nvidia_gpu_setup_quirk(vdev->vbasedev.name, cap->tgt,
2247 nv2reg->size);
2248free_exit:
2249 g_free(nv2reg);
2250
2251 return ret;
2252}
2253
2254int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp)
2255{
2256 int ret;
2257 void *p;
2258 struct vfio_region_info *atsdreg = NULL;
2259 struct vfio_info_cap_header *hdr;
2260 struct vfio_region_info_cap_nvlink2_ssatgt *captgt;
2261 struct vfio_region_info_cap_nvlink2_lnkspd *capspeed;
2262 VFIOQuirk *quirk;
2263
2264 ret = vfio_get_dev_region_info(&vdev->vbasedev,
2265 VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
2266 PCI_VENDOR_ID_IBM,
2267 VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
2268 &atsdreg);
2269 if (ret) {
2270 return ret;
2271 }
2272
2273 hdr = vfio_get_region_info_cap(atsdreg,
2274 VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
2275 if (!hdr) {
2276 ret = -ENODEV;
2277 goto free_exit;
2278 }
2279 captgt = (void *) hdr;
2280
2281 hdr = vfio_get_region_info_cap(atsdreg,
2282 VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD);
2283 if (!hdr) {
2284 ret = -ENODEV;
2285 goto free_exit;
2286 }
2287 capspeed = (void *) hdr;
2288
2289
2290 if (atsdreg->size) {
2291 p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE | PROT_EXEC,
2292 MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset);
2293 if (p == MAP_FAILED) {
2294 ret = -errno;
2295 goto free_exit;
2296 }
2297
2298 quirk = vfio_quirk_alloc(1);
2299 memory_region_init_ram_device_ptr(&quirk->mem[0], OBJECT(vdev),
2300 "nvlink2-atsd-mr", atsdreg->size, p);
2301 QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
2302 }
2303
2304 object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
2305 vfio_pci_nvlink2_get_tgt, NULL, NULL,
2306 (void *) (uintptr_t) captgt->tgt, NULL);
2307 trace_vfio_pci_nvlink2_setup_quirk_ssatgt(vdev->vbasedev.name, captgt->tgt,
2308 atsdreg->size);
2309
2310 object_property_add(OBJECT(vdev), "nvlink2-link-speed", "uint32",
2311 vfio_pci_nvlink2_get_link_speed, NULL, NULL,
2312 (void *) (uintptr_t) capspeed->link_speed, NULL);
2313 trace_vfio_pci_nvlink2_setup_quirk_lnkspd(vdev->vbasedev.name,
2314 capspeed->link_speed);
2315free_exit:
2316 g_free(atsdreg);
2317
2318 return ret;
2319}
2320