1
2
3
4
5
6
7
8
9
10
11
12
13#include "qemu/osdep.h"
14#include "qemu/units.h"
15#include "qemu/error-report.h"
16#include "qemu/main-loop.h"
17#include "qemu/module.h"
18#include "qemu/range.h"
19#include "qapi/error.h"
20#include "qapi/visitor.h"
21#include <sys/ioctl.h>
22#include "hw/nvram/fw_cfg.h"
23#include "pci.h"
24#include "trace.h"
25
26
27static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
28{
29 return (vendor == PCI_ANY_ID || vendor == vdev->vendor_id) &&
30 (device == PCI_ANY_ID || device == vdev->device_id);
31}
32
33static bool vfio_is_vga(VFIOPCIDevice *vdev)
34{
35 PCIDevice *pdev = &vdev->pdev;
36 uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
37
38 return class == PCI_CLASS_DISPLAY_VGA;
39}
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54static const struct {
55 uint32_t vendor;
56 uint32_t device;
57} romblacklist[] = {
58 { 0x14e4, 0x168e },
59};
60
61bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
62{
63 int i;
64
65 for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
66 if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
67 trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
68 romblacklist[i].vendor,
69 romblacklist[i].device);
70 return true;
71 }
72 }
73 return false;
74}
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91typedef struct VFIOConfigWindowMatch {
92 uint32_t match;
93 uint32_t mask;
94} VFIOConfigWindowMatch;
95
96typedef struct VFIOConfigWindowQuirk {
97 struct VFIOPCIDevice *vdev;
98
99 uint32_t address_val;
100
101 uint32_t address_offset;
102 uint32_t data_offset;
103
104 bool window_enabled;
105 uint8_t bar;
106
107 MemoryRegion *addr_mem;
108 MemoryRegion *data_mem;
109
110 uint32_t nr_matches;
111 VFIOConfigWindowMatch matches[];
112} VFIOConfigWindowQuirk;
113
114static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
115 hwaddr addr,
116 unsigned size)
117{
118 VFIOConfigWindowQuirk *window = opaque;
119 VFIOPCIDevice *vdev = window->vdev;
120
121 return vfio_region_read(&vdev->bars[window->bar].region,
122 addr + window->address_offset, size);
123}
124
125static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
126 uint64_t data,
127 unsigned size)
128{
129 VFIOConfigWindowQuirk *window = opaque;
130 VFIOPCIDevice *vdev = window->vdev;
131 int i;
132
133 window->window_enabled = false;
134
135 vfio_region_write(&vdev->bars[window->bar].region,
136 addr + window->address_offset, data, size);
137
138 for (i = 0; i < window->nr_matches; i++) {
139 if ((data & ~window->matches[i].mask) == window->matches[i].match) {
140 window->window_enabled = true;
141 window->address_val = data & window->matches[i].mask;
142 trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
143 memory_region_name(window->addr_mem), data);
144 break;
145 }
146 }
147}
148
149static const MemoryRegionOps vfio_generic_window_address_quirk = {
150 .read = vfio_generic_window_quirk_address_read,
151 .write = vfio_generic_window_quirk_address_write,
152 .endianness = DEVICE_LITTLE_ENDIAN,
153};
154
155static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
156 hwaddr addr, unsigned size)
157{
158 VFIOConfigWindowQuirk *window = opaque;
159 VFIOPCIDevice *vdev = window->vdev;
160 uint64_t data;
161
162
163 data = vfio_region_read(&vdev->bars[window->bar].region,
164 addr + window->data_offset, size);
165
166 if (window->window_enabled) {
167 data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
168 trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
169 memory_region_name(window->data_mem), data);
170 }
171
172 return data;
173}
174
175static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
176 uint64_t data, unsigned size)
177{
178 VFIOConfigWindowQuirk *window = opaque;
179 VFIOPCIDevice *vdev = window->vdev;
180
181 if (window->window_enabled) {
182 vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
183 trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
184 memory_region_name(window->data_mem), data);
185 return;
186 }
187
188 vfio_region_write(&vdev->bars[window->bar].region,
189 addr + window->data_offset, data, size);
190}
191
192static const MemoryRegionOps vfio_generic_window_data_quirk = {
193 .read = vfio_generic_window_quirk_data_read,
194 .write = vfio_generic_window_quirk_data_write,
195 .endianness = DEVICE_LITTLE_ENDIAN,
196};
197
198
199
200
201
202
203
204typedef struct VFIOConfigMirrorQuirk {
205 struct VFIOPCIDevice *vdev;
206 uint32_t offset;
207 uint8_t bar;
208 MemoryRegion *mem;
209 uint8_t data[];
210} VFIOConfigMirrorQuirk;
211
212static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
213 hwaddr addr, unsigned size)
214{
215 VFIOConfigMirrorQuirk *mirror = opaque;
216 VFIOPCIDevice *vdev = mirror->vdev;
217 uint64_t data;
218
219
220 (void)vfio_region_read(&vdev->bars[mirror->bar].region,
221 addr + mirror->offset, size);
222
223 data = vfio_pci_read_config(&vdev->pdev, addr, size);
224 trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
225 memory_region_name(mirror->mem),
226 addr, data);
227 return data;
228}
229
230static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
231 uint64_t data, unsigned size)
232{
233 VFIOConfigMirrorQuirk *mirror = opaque;
234 VFIOPCIDevice *vdev = mirror->vdev;
235
236 vfio_pci_write_config(&vdev->pdev, addr, data, size);
237 trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
238 memory_region_name(mirror->mem),
239 addr, data);
240}
241
242static const MemoryRegionOps vfio_generic_mirror_quirk = {
243 .read = vfio_generic_quirk_mirror_read,
244 .write = vfio_generic_quirk_mirror_write,
245 .endianness = DEVICE_LITTLE_ENDIAN,
246};
247
248
249static bool vfio_range_contained(uint64_t first1, uint64_t len1,
250 uint64_t first2, uint64_t len2) {
251 return (first1 >= first2 && first1 + len1 <= first2 + len2);
252}
253
254#define PCI_VENDOR_ID_ATI 0x1002
255
256
257
258
259
260
261
262
263
264
265
266static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
267 hwaddr addr, unsigned size)
268{
269 VFIOPCIDevice *vdev = opaque;
270 uint64_t data = vfio_pci_read_config(&vdev->pdev,
271 PCI_BASE_ADDRESS_4 + 1, size);
272
273 trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
274
275 return data;
276}
277
278static const MemoryRegionOps vfio_ati_3c3_quirk = {
279 .read = vfio_ati_3c3_quirk_read,
280 .endianness = DEVICE_LITTLE_ENDIAN,
281};
282
283static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
284{
285 VFIOQuirk *quirk = g_new0(VFIOQuirk, 1);
286 QLIST_INIT(&quirk->ioeventfds);
287 quirk->mem = g_new0(MemoryRegion, nr_mem);
288 quirk->nr_mem = nr_mem;
289
290 return quirk;
291}
292
293static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
294{
295 QLIST_REMOVE(ioeventfd, next);
296 memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
297 true, ioeventfd->data, &ioeventfd->e);
298
299 if (ioeventfd->vfio) {
300 struct vfio_device_ioeventfd vfio_ioeventfd;
301
302 vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
303 vfio_ioeventfd.flags = ioeventfd->size;
304 vfio_ioeventfd.data = ioeventfd->data;
305 vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
306 ioeventfd->region_addr;
307 vfio_ioeventfd.fd = -1;
308
309 if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd)) {
310 error_report("Failed to remove vfio ioeventfd for %s+0x%"
311 HWADDR_PRIx"[%d]:0x%"PRIx64" (%m)",
312 memory_region_name(ioeventfd->mr), ioeventfd->addr,
313 ioeventfd->size, ioeventfd->data);
314 }
315 } else {
316 qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
317 NULL, NULL, NULL);
318 }
319
320 event_notifier_cleanup(&ioeventfd->e);
321 trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
322 (uint64_t)ioeventfd->addr, ioeventfd->size,
323 ioeventfd->data);
324 g_free(ioeventfd);
325}
326
327static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
328{
329 VFIOIOEventFD *ioeventfd, *tmp;
330
331 QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
332 if (ioeventfd->dynamic) {
333 vfio_ioeventfd_exit(vdev, ioeventfd);
334 }
335 }
336}
337
338static void vfio_ioeventfd_handler(void *opaque)
339{
340 VFIOIOEventFD *ioeventfd = opaque;
341
342 if (event_notifier_test_and_clear(&ioeventfd->e)) {
343 vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
344 ioeventfd->data, ioeventfd->size);
345 trace_vfio_ioeventfd_handler(memory_region_name(ioeventfd->mr),
346 (uint64_t)ioeventfd->addr, ioeventfd->size,
347 ioeventfd->data);
348 }
349}
350
351static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
352 MemoryRegion *mr, hwaddr addr,
353 unsigned size, uint64_t data,
354 VFIORegion *region,
355 hwaddr region_addr, bool dynamic)
356{
357 VFIOIOEventFD *ioeventfd;
358
359 if (vdev->no_kvm_ioeventfd) {
360 return NULL;
361 }
362
363 ioeventfd = g_malloc0(sizeof(*ioeventfd));
364
365 if (event_notifier_init(&ioeventfd->e, 0)) {
366 g_free(ioeventfd);
367 return NULL;
368 }
369
370
371
372
373
374 ioeventfd->mr = mr;
375 ioeventfd->addr = addr;
376 ioeventfd->size = size;
377 ioeventfd->data = data;
378 ioeventfd->dynamic = dynamic;
379
380
381
382
383 ioeventfd->region = region;
384 ioeventfd->region_addr = region_addr;
385
386 if (!vdev->no_vfio_ioeventfd) {
387 struct vfio_device_ioeventfd vfio_ioeventfd;
388
389 vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
390 vfio_ioeventfd.flags = ioeventfd->size;
391 vfio_ioeventfd.data = ioeventfd->data;
392 vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
393 ioeventfd->region_addr;
394 vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
395
396 ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
397 VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
398 }
399
400 if (!ioeventfd->vfio) {
401 qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
402 vfio_ioeventfd_handler, NULL, ioeventfd);
403 }
404
405 memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
406 true, ioeventfd->data, &ioeventfd->e);
407 trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
408 size, data, ioeventfd->vfio);
409
410 return ioeventfd;
411}
412
413static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
414{
415 VFIOQuirk *quirk;
416
417
418
419
420
421 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
422 !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
423 return;
424 }
425
426 quirk = vfio_quirk_alloc(1);
427
428 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
429 "vfio-ati-3c3-quirk", 1);
430 memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
431 3 , quirk->mem);
432
433 QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
434 quirk, next);
435
436 trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
437}
438
439
440
441
442
443
444
445
446
447
448static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
449{
450 VFIOQuirk *quirk;
451 VFIOConfigWindowQuirk *window;
452
453
454 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
455 !vdev->vga || nr != 4) {
456 return;
457 }
458
459 quirk = vfio_quirk_alloc(2);
460 window = quirk->data = g_malloc0(sizeof(*window) +
461 sizeof(VFIOConfigWindowMatch));
462 window->vdev = vdev;
463 window->address_offset = 0;
464 window->data_offset = 4;
465 window->nr_matches = 1;
466 window->matches[0].match = 0x4000;
467 window->matches[0].mask = vdev->config_size - 1;
468 window->bar = nr;
469 window->addr_mem = &quirk->mem[0];
470 window->data_mem = &quirk->mem[1];
471
472 memory_region_init_io(window->addr_mem, OBJECT(vdev),
473 &vfio_generic_window_address_quirk, window,
474 "vfio-ati-bar4-window-address-quirk", 4);
475 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
476 window->address_offset,
477 window->addr_mem, 1);
478
479 memory_region_init_io(window->data_mem, OBJECT(vdev),
480 &vfio_generic_window_data_quirk, window,
481 "vfio-ati-bar4-window-data-quirk", 4);
482 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
483 window->data_offset,
484 window->data_mem, 1);
485
486 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
487
488 trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
489}
490
491
492
493
494static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
495{
496 VFIOQuirk *quirk;
497 VFIOConfigMirrorQuirk *mirror;
498
499
500 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
501 !vdev->vga || nr != 2 || !vdev->bars[2].mem64) {
502 return;
503 }
504
505 quirk = vfio_quirk_alloc(1);
506 mirror = quirk->data = g_malloc0(sizeof(*mirror));
507 mirror->mem = quirk->mem;
508 mirror->vdev = vdev;
509 mirror->offset = 0x4000;
510 mirror->bar = nr;
511
512 memory_region_init_io(mirror->mem, OBJECT(vdev),
513 &vfio_generic_mirror_quirk, mirror,
514 "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
515 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
516 mirror->offset, mirror->mem, 1);
517
518 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
519
520 trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
521}
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
546static const char *nv3d0_states[] = { "NONE", "SELECT",
547 "WINDOW", "READ", "WRITE" };
548
549typedef struct VFIONvidia3d0Quirk {
550 VFIOPCIDevice *vdev;
551 VFIONvidia3d0State state;
552 uint32_t offset;
553} VFIONvidia3d0Quirk;
554
555static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
556 hwaddr addr, unsigned size)
557{
558 VFIONvidia3d0Quirk *quirk = opaque;
559 VFIOPCIDevice *vdev = quirk->vdev;
560
561 quirk->state = NONE;
562
563 return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
564 addr + 0x14, size);
565}
566
567static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
568 uint64_t data, unsigned size)
569{
570 VFIONvidia3d0Quirk *quirk = opaque;
571 VFIOPCIDevice *vdev = quirk->vdev;
572 VFIONvidia3d0State old_state = quirk->state;
573
574 quirk->state = NONE;
575
576 switch (data) {
577 case 0x338:
578 if (old_state == NONE) {
579 quirk->state = SELECT;
580 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
581 nv3d0_states[quirk->state]);
582 }
583 break;
584 case 0x538:
585 if (old_state == WINDOW) {
586 quirk->state = READ;
587 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
588 nv3d0_states[quirk->state]);
589 }
590 break;
591 case 0x738:
592 if (old_state == WINDOW) {
593 quirk->state = WRITE;
594 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
595 nv3d0_states[quirk->state]);
596 }
597 break;
598 }
599
600 vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
601 addr + 0x14, data, size);
602}
603
604static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
605 .read = vfio_nvidia_3d4_quirk_read,
606 .write = vfio_nvidia_3d4_quirk_write,
607 .endianness = DEVICE_LITTLE_ENDIAN,
608};
609
610static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
611 hwaddr addr, unsigned size)
612{
613 VFIONvidia3d0Quirk *quirk = opaque;
614 VFIOPCIDevice *vdev = quirk->vdev;
615 VFIONvidia3d0State old_state = quirk->state;
616 uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
617 addr + 0x10, size);
618
619 quirk->state = NONE;
620
621 if (old_state == READ &&
622 (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
623 uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
624
625 data = vfio_pci_read_config(&vdev->pdev, offset, size);
626 trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
627 offset, size, data);
628 }
629
630 return data;
631}
632
633static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
634 uint64_t data, unsigned size)
635{
636 VFIONvidia3d0Quirk *quirk = opaque;
637 VFIOPCIDevice *vdev = quirk->vdev;
638 VFIONvidia3d0State old_state = quirk->state;
639
640 quirk->state = NONE;
641
642 if (old_state == SELECT) {
643 quirk->offset = (uint32_t)data;
644 quirk->state = WINDOW;
645 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
646 nv3d0_states[quirk->state]);
647 } else if (old_state == WRITE) {
648 if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
649 uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
650
651 vfio_pci_write_config(&vdev->pdev, offset, data, size);
652 trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
653 offset, data, size);
654 return;
655 }
656 }
657
658 vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
659 addr + 0x10, data, size);
660}
661
662static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
663 .read = vfio_nvidia_3d0_quirk_read,
664 .write = vfio_nvidia_3d0_quirk_write,
665 .endianness = DEVICE_LITTLE_ENDIAN,
666};
667
668static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
669{
670 VFIOQuirk *quirk;
671 VFIONvidia3d0Quirk *data;
672
673 if (vdev->no_geforce_quirks ||
674 !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
675 !vdev->bars[1].region.size) {
676 return;
677 }
678
679 quirk = vfio_quirk_alloc(2);
680 quirk->data = data = g_malloc0(sizeof(*data));
681 data->vdev = vdev;
682
683 memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
684 data, "vfio-nvidia-3d4-quirk", 2);
685 memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
686 0x14 , &quirk->mem[0]);
687
688 memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
689 data, "vfio-nvidia-3d0-quirk", 2);
690 memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
691 0x10 , &quirk->mem[1]);
692
693 QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
694 quirk, next);
695
696 trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
697}
698
699
700
701
702
703
704
705
706typedef struct VFIONvidiaBAR5Quirk {
707 uint32_t master;
708 uint32_t enable;
709 MemoryRegion *addr_mem;
710 MemoryRegion *data_mem;
711 bool enabled;
712 VFIOConfigWindowQuirk window;
713} VFIONvidiaBAR5Quirk;
714
715static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
716{
717 VFIOPCIDevice *vdev = bar5->window.vdev;
718
719 if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
720 return;
721 }
722
723 bar5->enabled = !bar5->enabled;
724 trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
725 bar5->enabled ? "Enable" : "Disable");
726 memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
727 memory_region_set_enabled(bar5->data_mem, bar5->enabled);
728}
729
730static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
731 hwaddr addr, unsigned size)
732{
733 VFIONvidiaBAR5Quirk *bar5 = opaque;
734 VFIOPCIDevice *vdev = bar5->window.vdev;
735
736 return vfio_region_read(&vdev->bars[5].region, addr, size);
737}
738
739static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
740 uint64_t data, unsigned size)
741{
742 VFIONvidiaBAR5Quirk *bar5 = opaque;
743 VFIOPCIDevice *vdev = bar5->window.vdev;
744
745 vfio_region_write(&vdev->bars[5].region, addr, data, size);
746
747 bar5->master = data;
748 vfio_nvidia_bar5_enable(bar5);
749}
750
751static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
752 .read = vfio_nvidia_bar5_quirk_master_read,
753 .write = vfio_nvidia_bar5_quirk_master_write,
754 .endianness = DEVICE_LITTLE_ENDIAN,
755};
756
757static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
758 hwaddr addr, unsigned size)
759{
760 VFIONvidiaBAR5Quirk *bar5 = opaque;
761 VFIOPCIDevice *vdev = bar5->window.vdev;
762
763 return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
764}
765
766static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
767 uint64_t data, unsigned size)
768{
769 VFIONvidiaBAR5Quirk *bar5 = opaque;
770 VFIOPCIDevice *vdev = bar5->window.vdev;
771
772 vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
773
774 bar5->enable = data;
775 vfio_nvidia_bar5_enable(bar5);
776}
777
778static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
779 .read = vfio_nvidia_bar5_quirk_enable_read,
780 .write = vfio_nvidia_bar5_quirk_enable_write,
781 .endianness = DEVICE_LITTLE_ENDIAN,
782};
783
784static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
785{
786 VFIOQuirk *quirk;
787 VFIONvidiaBAR5Quirk *bar5;
788 VFIOConfigWindowQuirk *window;
789
790 if (vdev->no_geforce_quirks ||
791 !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
792 !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
793 return;
794 }
795
796 quirk = vfio_quirk_alloc(4);
797 bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
798 (sizeof(VFIOConfigWindowMatch) * 2));
799 window = &bar5->window;
800
801 window->vdev = vdev;
802 window->address_offset = 0x8;
803 window->data_offset = 0xc;
804 window->nr_matches = 2;
805 window->matches[0].match = 0x1800;
806 window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
807 window->matches[1].match = 0x88000;
808 window->matches[1].mask = vdev->config_size - 1;
809 window->bar = nr;
810 window->addr_mem = bar5->addr_mem = &quirk->mem[0];
811 window->data_mem = bar5->data_mem = &quirk->mem[1];
812
813 memory_region_init_io(window->addr_mem, OBJECT(vdev),
814 &vfio_generic_window_address_quirk, window,
815 "vfio-nvidia-bar5-window-address-quirk", 4);
816 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
817 window->address_offset,
818 window->addr_mem, 1);
819 memory_region_set_enabled(window->addr_mem, false);
820
821 memory_region_init_io(window->data_mem, OBJECT(vdev),
822 &vfio_generic_window_data_quirk, window,
823 "vfio-nvidia-bar5-window-data-quirk", 4);
824 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
825 window->data_offset,
826 window->data_mem, 1);
827 memory_region_set_enabled(window->data_mem, false);
828
829 memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
830 &vfio_nvidia_bar5_quirk_master, bar5,
831 "vfio-nvidia-bar5-master-quirk", 4);
832 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
833 0, &quirk->mem[2], 1);
834
835 memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
836 &vfio_nvidia_bar5_quirk_enable, bar5,
837 "vfio-nvidia-bar5-enable-quirk", 4);
838 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
839 4, &quirk->mem[3], 1);
840
841 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
842
843 trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
844}
845
846typedef struct LastDataSet {
847 VFIOQuirk *quirk;
848 hwaddr addr;
849 uint64_t data;
850 unsigned size;
851 int hits;
852 int added;
853} LastDataSet;
854
855#define MAX_DYN_IOEVENTFD 10
856#define HITS_FOR_IOEVENTFD 10
857
858
859
860
861
862static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
863 uint64_t data, unsigned size)
864{
865 VFIOConfigMirrorQuirk *mirror = opaque;
866 VFIOPCIDevice *vdev = mirror->vdev;
867 PCIDevice *pdev = &vdev->pdev;
868 LastDataSet *last = (LastDataSet *)&mirror->data;
869
870 vfio_generic_quirk_mirror_write(opaque, addr, data, size);
871
872
873
874
875
876
877 if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
878 vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
879 vfio_region_write(&vdev->bars[mirror->bar].region,
880 addr + mirror->offset, data, size);
881 trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
882 }
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899 if (!vdev->no_kvm_ioeventfd &&
900 addr >= PCI_STD_HEADER_SIZEOF && last->added <= MAX_DYN_IOEVENTFD) {
901 if (addr != last->addr || data != last->data || size != last->size) {
902 last->addr = addr;
903 last->data = data;
904 last->size = size;
905 last->hits = 1;
906 } else if (++last->hits >= HITS_FOR_IOEVENTFD) {
907 if (last->added < MAX_DYN_IOEVENTFD) {
908 VFIOIOEventFD *ioeventfd;
909 ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size,
910 data, &vdev->bars[mirror->bar].region,
911 mirror->offset + addr, true);
912 if (ioeventfd) {
913 VFIOQuirk *quirk = last->quirk;
914
915 QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
916 last->added++;
917 }
918 } else {
919 last->added++;
920 warn_report("NVIDIA ioeventfd queue full for %s, unable to "
921 "accelerate 0x%"HWADDR_PRIx", data 0x%"PRIx64", "
922 "size %u", vdev->vbasedev.name, addr, data, size);
923 }
924 }
925 }
926}
927
928static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
929 .read = vfio_generic_quirk_mirror_read,
930 .write = vfio_nvidia_quirk_mirror_write,
931 .endianness = DEVICE_LITTLE_ENDIAN,
932};
933
934static void vfio_nvidia_bar0_quirk_reset(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
935{
936 VFIOConfigMirrorQuirk *mirror = quirk->data;
937 LastDataSet *last = (LastDataSet *)&mirror->data;
938
939 last->addr = last->data = last->size = last->hits = last->added = 0;
940
941 vfio_drop_dynamic_eventfds(vdev, quirk);
942}
943
944static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
945{
946 VFIOQuirk *quirk;
947 VFIOConfigMirrorQuirk *mirror;
948 LastDataSet *last;
949
950 if (vdev->no_geforce_quirks ||
951 !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
952 !vfio_is_vga(vdev) || nr != 0) {
953 return;
954 }
955
956 quirk = vfio_quirk_alloc(1);
957 quirk->reset = vfio_nvidia_bar0_quirk_reset;
958 mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
959 mirror->mem = quirk->mem;
960 mirror->vdev = vdev;
961 mirror->offset = 0x88000;
962 mirror->bar = nr;
963 last = (LastDataSet *)&mirror->data;
964 last->quirk = quirk;
965
966 memory_region_init_io(mirror->mem, OBJECT(vdev),
967 &vfio_nvidia_mirror_quirk, mirror,
968 "vfio-nvidia-bar0-88000-mirror-quirk",
969 vdev->config_size);
970 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
971 mirror->offset, mirror->mem, 1);
972
973 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
974
975
976 if (vdev->vga) {
977 quirk = vfio_quirk_alloc(1);
978 quirk->reset = vfio_nvidia_bar0_quirk_reset;
979 mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
980 mirror->mem = quirk->mem;
981 mirror->vdev = vdev;
982 mirror->offset = 0x1800;
983 mirror->bar = nr;
984 last = (LastDataSet *)&mirror->data;
985 last->quirk = quirk;
986
987 memory_region_init_io(mirror->mem, OBJECT(vdev),
988 &vfio_nvidia_mirror_quirk, mirror,
989 "vfio-nvidia-bar0-1800-mirror-quirk",
990 PCI_CONFIG_SPACE_SIZE);
991 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
992 mirror->offset, mirror->mem, 1);
993
994 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
995 }
996
997 trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
998}
999
1000
1001
1002
1003
1004
1005
1006#define PCI_VENDOR_ID_REALTEK 0x10ec
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030typedef struct VFIOrtl8168Quirk {
1031 VFIOPCIDevice *vdev;
1032 uint32_t addr;
1033 uint32_t data;
1034 bool enabled;
1035} VFIOrtl8168Quirk;
1036
1037static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
1038 hwaddr addr, unsigned size)
1039{
1040 VFIOrtl8168Quirk *rtl = opaque;
1041 VFIOPCIDevice *vdev = rtl->vdev;
1042 uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
1043
1044 if (rtl->enabled) {
1045 data = rtl->addr ^ 0x80000000U;
1046 trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
1047 }
1048
1049 return data;
1050}
1051
1052static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
1053 uint64_t data, unsigned size)
1054{
1055 VFIOrtl8168Quirk *rtl = opaque;
1056 VFIOPCIDevice *vdev = rtl->vdev;
1057
1058 rtl->enabled = false;
1059
1060 if ((data & 0x7fff0000) == 0x10000) {
1061 rtl->enabled = true;
1062 rtl->addr = (uint32_t)data;
1063
1064 if (data & 0x80000000U) {
1065 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
1066 hwaddr offset = data & 0xfff;
1067 uint64_t val = rtl->data;
1068
1069 trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
1070 (uint16_t)offset, val);
1071
1072
1073 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
1074 offset, val, size,
1075 MEMTXATTRS_UNSPECIFIED);
1076 }
1077 return;
1078 }
1079 }
1080
1081 vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
1082}
1083
1084static const MemoryRegionOps vfio_rtl_address_quirk = {
1085 .read = vfio_rtl8168_quirk_address_read,
1086 .write = vfio_rtl8168_quirk_address_write,
1087 .valid = {
1088 .min_access_size = 4,
1089 .max_access_size = 4,
1090 .unaligned = false,
1091 },
1092 .endianness = DEVICE_LITTLE_ENDIAN,
1093};
1094
1095static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
1096 hwaddr addr, unsigned size)
1097{
1098 VFIOrtl8168Quirk *rtl = opaque;
1099 VFIOPCIDevice *vdev = rtl->vdev;
1100 uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
1101
1102 if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
1103 hwaddr offset = rtl->addr & 0xfff;
1104 memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
1105 &data, size, MEMTXATTRS_UNSPECIFIED);
1106 trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
1107 }
1108
1109 return data;
1110}
1111
1112static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
1113 uint64_t data, unsigned size)
1114{
1115 VFIOrtl8168Quirk *rtl = opaque;
1116 VFIOPCIDevice *vdev = rtl->vdev;
1117
1118 rtl->data = (uint32_t)data;
1119
1120 vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
1121}
1122
1123static const MemoryRegionOps vfio_rtl_data_quirk = {
1124 .read = vfio_rtl8168_quirk_data_read,
1125 .write = vfio_rtl8168_quirk_data_write,
1126 .valid = {
1127 .min_access_size = 4,
1128 .max_access_size = 4,
1129 .unaligned = false,
1130 },
1131 .endianness = DEVICE_LITTLE_ENDIAN,
1132};
1133
1134static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
1135{
1136 VFIOQuirk *quirk;
1137 VFIOrtl8168Quirk *rtl;
1138
1139 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
1140 return;
1141 }
1142
1143 quirk = vfio_quirk_alloc(2);
1144 quirk->data = rtl = g_malloc0(sizeof(*rtl));
1145 rtl->vdev = vdev;
1146
1147 memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
1148 &vfio_rtl_address_quirk, rtl,
1149 "vfio-rtl8168-window-address-quirk", 4);
1150 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1151 0x74, &quirk->mem[0], 1);
1152
1153 memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
1154 &vfio_rtl_data_quirk, rtl,
1155 "vfio-rtl8168-window-data-quirk", 4);
1156 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1157 0x70, &quirk->mem[1], 1);
1158
1159 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1160
1161 trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
1162}
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202static int igd_gen(VFIOPCIDevice *vdev)
1203{
1204 if ((vdev->device_id & 0xfff) == 0xa84) {
1205 return 8;
1206 }
1207
1208 switch (vdev->device_id & 0xff00) {
1209
1210 case 0x0000:
1211 case 0x2500:
1212 case 0x2700:
1213 case 0x2900:
1214 case 0x2a00:
1215 case 0x2e00:
1216 case 0x3500:
1217 case 0xa000:
1218 return -1;
1219
1220 case 0x0100:
1221 case 0x0400:
1222 case 0x0a00:
1223 case 0x0c00:
1224 case 0x0d00:
1225 case 0x0f00:
1226 return 6;
1227
1228 case 0x1600:
1229 case 0x1900:
1230 case 0x2200:
1231 case 0x5900:
1232 return 8;
1233 }
1234
1235 return 8;
1236}
1237
1238typedef struct VFIOIGDQuirk {
1239 struct VFIOPCIDevice *vdev;
1240 uint32_t index;
1241 uint32_t bdsm;
1242} VFIOIGDQuirk;
1243
1244#define IGD_GMCH 0x50
1245#define IGD_BDSM 0x5c
1246#define IGD_ASLS 0xfc
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
1257 struct vfio_region_info *info, Error **errp)
1258{
1259 int ret;
1260
1261 vdev->igd_opregion = g_malloc0(info->size);
1262 ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
1263 info->size, info->offset);
1264 if (ret != info->size) {
1265 error_setg(errp, "failed to read IGD OpRegion");
1266 g_free(vdev->igd_opregion);
1267 vdev->igd_opregion = NULL;
1268 return -EINVAL;
1269 }
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284 fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
1285 vdev->igd_opregion, info->size);
1286
1287 trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
1288
1289 pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
1290 pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
1291 pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
1292
1293 return 0;
1294}
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304typedef struct {
1305 uint8_t offset;
1306 uint8_t len;
1307} IGDHostInfo;
1308
1309static const IGDHostInfo igd_host_bridge_infos[] = {
1310 {PCI_REVISION_ID, 2},
1311 {PCI_SUBSYSTEM_VENDOR_ID, 2},
1312 {PCI_SUBSYSTEM_ID, 2},
1313};
1314
1315static const IGDHostInfo igd_lpc_bridge_infos[] = {
1316 {PCI_VENDOR_ID, 2},
1317 {PCI_DEVICE_ID, 2},
1318 {PCI_REVISION_ID, 2},
1319 {PCI_SUBSYSTEM_VENDOR_ID, 2},
1320 {PCI_SUBSYSTEM_ID, 2},
1321};
1322
1323static int vfio_pci_igd_copy(VFIOPCIDevice *vdev, PCIDevice *pdev,
1324 struct vfio_region_info *info,
1325 const IGDHostInfo *list, int len)
1326{
1327 int i, ret;
1328
1329 for (i = 0; i < len; i++) {
1330 ret = pread(vdev->vbasedev.fd, pdev->config + list[i].offset,
1331 list[i].len, info->offset + list[i].offset);
1332 if (ret != list[i].len) {
1333 error_report("IGD copy failed: %m");
1334 return -errno;
1335 }
1336 }
1337
1338 return 0;
1339}
1340
1341
1342
1343
1344static int vfio_pci_igd_host_init(VFIOPCIDevice *vdev,
1345 struct vfio_region_info *info)
1346{
1347 PCIBus *bus;
1348 PCIDevice *host_bridge;
1349 int ret;
1350
1351 bus = pci_device_root_bus(&vdev->pdev);
1352 host_bridge = pci_find_device(bus, 0, PCI_DEVFN(0, 0));
1353
1354 if (!host_bridge) {
1355 error_report("Can't find host bridge");
1356 return -ENODEV;
1357 }
1358
1359 ret = vfio_pci_igd_copy(vdev, host_bridge, info, igd_host_bridge_infos,
1360 ARRAY_SIZE(igd_host_bridge_infos));
1361 if (!ret) {
1362 trace_vfio_pci_igd_host_bridge_enabled(vdev->vbasedev.name);
1363 }
1364
1365 return ret;
1366}
1367
1368
1369
1370
1371
1372
1373
1374static void vfio_pci_igd_lpc_bridge_realize(PCIDevice *pdev, Error **errp)
1375{
1376 if (pdev->devfn != PCI_DEVFN(0x1f, 0)) {
1377 error_setg(errp, "VFIO dummy ISA/LPC bridge must have address 1f.0");
1378 }
1379}
1380
1381static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data)
1382{
1383 DeviceClass *dc = DEVICE_CLASS(klass);
1384 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1385
1386 set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
1387 dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment";
1388 dc->hotpluggable = false;
1389 k->realize = vfio_pci_igd_lpc_bridge_realize;
1390 k->class_id = PCI_CLASS_BRIDGE_ISA;
1391}
1392
1393static TypeInfo vfio_pci_igd_lpc_bridge_info = {
1394 .name = "vfio-pci-igd-lpc-bridge",
1395 .parent = TYPE_PCI_DEVICE,
1396 .class_init = vfio_pci_igd_lpc_bridge_class_init,
1397 .interfaces = (InterfaceInfo[]) {
1398 { INTERFACE_CONVENTIONAL_PCI_DEVICE },
1399 { },
1400 },
1401};
1402
1403static void vfio_pci_igd_register_types(void)
1404{
1405 type_register_static(&vfio_pci_igd_lpc_bridge_info);
1406}
1407
1408type_init(vfio_pci_igd_register_types)
1409
1410static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
1411 struct vfio_region_info *info)
1412{
1413 PCIDevice *lpc_bridge;
1414 int ret;
1415
1416 lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
1417 0, PCI_DEVFN(0x1f, 0));
1418 if (!lpc_bridge) {
1419 lpc_bridge = pci_create_simple(pci_device_root_bus(&vdev->pdev),
1420 PCI_DEVFN(0x1f, 0), "vfio-pci-igd-lpc-bridge");
1421 }
1422
1423 ret = vfio_pci_igd_copy(vdev, lpc_bridge, info, igd_lpc_bridge_infos,
1424 ARRAY_SIZE(igd_lpc_bridge_infos));
1425 if (!ret) {
1426 trace_vfio_pci_igd_lpc_bridge_enabled(vdev->vbasedev.name);
1427 }
1428
1429 return ret;
1430}
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440static int vfio_igd_gtt_max(VFIOPCIDevice *vdev)
1441{
1442 uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
1443 int ggms, gen = igd_gen(vdev);
1444
1445 gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
1446 ggms = (gmch >> (gen < 8 ? 8 : 6)) & 0x3;
1447 if (gen > 6) {
1448 ggms = 1 << ggms;
1449 }
1450
1451 ggms *= MiB;
1452
1453 return (ggms / (4 * KiB)) * (gen < 8 ? 4 : 8);
1454}
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467static uint64_t vfio_igd_quirk_data_read(void *opaque,
1468 hwaddr addr, unsigned size)
1469{
1470 VFIOIGDQuirk *igd = opaque;
1471 VFIOPCIDevice *vdev = igd->vdev;
1472
1473 igd->index = ~0;
1474
1475 return vfio_region_read(&vdev->bars[4].region, addr + 4, size);
1476}
1477
1478static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr,
1479 uint64_t data, unsigned size)
1480{
1481 VFIOIGDQuirk *igd = opaque;
1482 VFIOPCIDevice *vdev = igd->vdev;
1483 uint64_t val = data;
1484 int gen = igd_gen(vdev);
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500 if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) {
1501 if (gen < 8 || (igd->index % 8 == 1)) {
1502 uint32_t base;
1503
1504 base = pci_get_long(vdev->pdev.config + IGD_BDSM);
1505 if (!base) {
1506 hw_error("vfio-igd: Guest attempted to program IGD GTT before "
1507 "BIOS reserved stolen memory. Unsupported BIOS?");
1508 }
1509
1510 val = data - igd->bdsm + base;
1511 } else {
1512 val = 0;
1513 }
1514
1515 trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name,
1516 igd->index, data, val);
1517 }
1518
1519 vfio_region_write(&vdev->bars[4].region, addr + 4, val, size);
1520
1521 igd->index = ~0;
1522}
1523
1524static const MemoryRegionOps vfio_igd_data_quirk = {
1525 .read = vfio_igd_quirk_data_read,
1526 .write = vfio_igd_quirk_data_write,
1527 .endianness = DEVICE_LITTLE_ENDIAN,
1528};
1529
1530static uint64_t vfio_igd_quirk_index_read(void *opaque,
1531 hwaddr addr, unsigned size)
1532{
1533 VFIOIGDQuirk *igd = opaque;
1534 VFIOPCIDevice *vdev = igd->vdev;
1535
1536 igd->index = ~0;
1537
1538 return vfio_region_read(&vdev->bars[4].region, addr, size);
1539}
1540
1541static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr,
1542 uint64_t data, unsigned size)
1543{
1544 VFIOIGDQuirk *igd = opaque;
1545 VFIOPCIDevice *vdev = igd->vdev;
1546
1547 igd->index = data;
1548
1549 vfio_region_write(&vdev->bars[4].region, addr, data, size);
1550}
1551
1552static const MemoryRegionOps vfio_igd_index_quirk = {
1553 .read = vfio_igd_quirk_index_read,
1554 .write = vfio_igd_quirk_index_write,
1555 .endianness = DEVICE_LITTLE_ENDIAN,
1556};
1557
1558static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
1559{
1560 struct vfio_region_info *rom = NULL, *opregion = NULL,
1561 *host = NULL, *lpc = NULL;
1562 VFIOQuirk *quirk;
1563 VFIOIGDQuirk *igd;
1564 PCIDevice *lpc_bridge;
1565 int i, ret, ggms_mb, gms_mb = 0, gen;
1566 uint64_t *bdsm_size;
1567 uint32_t gmch;
1568 uint16_t cmd_orig, cmd;
1569 Error *err = NULL;
1570
1571
1572
1573
1574
1575
1576 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
1577 !vfio_is_vga(vdev) || nr != 4 ||
1578 &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
1579 0, PCI_DEVFN(0x2, 0))) {
1580 return;
1581 }
1582
1583
1584
1585
1586
1587
1588 lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
1589 0, PCI_DEVFN(0x1f, 0));
1590 if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
1591 "vfio-pci-igd-lpc-bridge")) {
1592 error_report("IGD device %s cannot support legacy mode due to existing "
1593 "devices at address 1f.0", vdev->vbasedev.name);
1594 return;
1595 }
1596
1597
1598
1599
1600
1601
1602 gen = igd_gen(vdev);
1603 if (gen != 6 && gen != 8) {
1604 error_report("IGD device %s is unsupported in legacy mode, "
1605 "try SandyBridge or newer", vdev->vbasedev.name);
1606 return;
1607 }
1608
1609
1610
1611
1612
1613
1614 ret = vfio_get_region_info(&vdev->vbasedev,
1615 VFIO_PCI_ROM_REGION_INDEX, &rom);
1616 if ((ret || !rom->size) && !vdev->pdev.romfile) {
1617 error_report("IGD device %s has no ROM, legacy mode disabled",
1618 vdev->vbasedev.name);
1619 goto out;
1620 }
1621
1622
1623
1624
1625
1626 if (vdev->pdev.qdev.hotplugged) {
1627 error_report("IGD device %s hotplugged, ROM disabled, "
1628 "legacy mode disabled", vdev->vbasedev.name);
1629 vdev->rom_read_failed = true;
1630 goto out;
1631 }
1632
1633
1634
1635
1636
1637 ret = vfio_get_dev_region_info(&vdev->vbasedev,
1638 VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
1639 VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
1640 if (ret) {
1641 error_report("IGD device %s does not support OpRegion access,"
1642 "legacy mode disabled", vdev->vbasedev.name);
1643 goto out;
1644 }
1645
1646 ret = vfio_get_dev_region_info(&vdev->vbasedev,
1647 VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
1648 VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
1649 if (ret) {
1650 error_report("IGD device %s does not support host bridge access,"
1651 "legacy mode disabled", vdev->vbasedev.name);
1652 goto out;
1653 }
1654
1655 ret = vfio_get_dev_region_info(&vdev->vbasedev,
1656 VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
1657 VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);
1658 if (ret) {
1659 error_report("IGD device %s does not support LPC bridge access,"
1660 "legacy mode disabled", vdev->vbasedev.name);
1661 goto out;
1662 }
1663
1664 gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
1665
1666
1667
1668
1669
1670
1671 if (!(gmch & 0x2) && !vdev->vga && vfio_populate_vga(vdev, &err)) {
1672 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
1673 error_report("IGD device %s failed to enable VGA access, "
1674 "legacy mode disabled", vdev->vbasedev.name);
1675 goto out;
1676 }
1677
1678
1679 ret = vfio_pci_igd_lpc_init(vdev, lpc);
1680 if (ret) {
1681 error_report("IGD device %s failed to create LPC bridge, "
1682 "legacy mode disabled", vdev->vbasedev.name);
1683 goto out;
1684 }
1685
1686
1687 ret = vfio_pci_igd_host_init(vdev, host);
1688 if (ret) {
1689 error_report("IGD device %s failed to modify host bridge, "
1690 "legacy mode disabled", vdev->vbasedev.name);
1691 goto out;
1692 }
1693
1694
1695 ret = vfio_pci_igd_opregion_init(vdev, opregion, &err);
1696 if (ret) {
1697 error_append_hint(&err, "IGD legacy mode disabled\n");
1698 error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
1699 goto out;
1700 }
1701
1702
1703 quirk = vfio_quirk_alloc(2);
1704 igd = quirk->data = g_malloc0(sizeof(*igd));
1705 igd->vdev = vdev;
1706 igd->index = ~0;
1707 igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4);
1708 igd->bdsm &= ~((1 * MiB) - 1);
1709
1710 memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk,
1711 igd, "vfio-igd-index-quirk", 4);
1712 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1713 0, &quirk->mem[0], 1);
1714
1715 memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk,
1716 igd, "vfio-igd-data-quirk", 4);
1717 memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
1718 4, &quirk->mem[1], 1);
1719
1720 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1721
1722
1723 ggms_mb = (gmch >> (gen < 8 ? 8 : 6)) & 0x3;
1724 if (gen > 6) {
1725 ggms_mb = 1 << ggms_mb;
1726 }
1727
1728
1729
1730
1731
1732
1733
1734 gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));
1735
1736 if (vdev->igd_gms) {
1737 if (vdev->igd_gms <= 0x10) {
1738 gms_mb = vdev->igd_gms * 32;
1739 gmch |= vdev->igd_gms << (gen < 8 ? 3 : 8);
1740 } else {
1741 error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms);
1742 vdev->igd_gms = 0;
1743 }
1744 }
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754 bdsm_size = g_malloc(sizeof(*bdsm_size));
1755 *bdsm_size = cpu_to_le64((ggms_mb + gms_mb) * MiB);
1756 fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
1757 bdsm_size, sizeof(*bdsm_size));
1758
1759
1760 pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
1761 pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
1762 pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
1763
1764
1765 pci_set_long(vdev->pdev.config + IGD_BDSM, 0);
1766 pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0);
1767 pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0);
1768
1769
1770
1771
1772
1773
1774
1775 if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
1776 vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
1777 error_report("IGD device %s - failed to read PCI command register",
1778 vdev->vbasedev.name);
1779 }
1780
1781 cmd = cmd_orig | PCI_COMMAND_IO;
1782
1783 if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd),
1784 vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) {
1785 error_report("IGD device %s - failed to write PCI command register",
1786 vdev->vbasedev.name);
1787 }
1788
1789 for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) {
1790 vfio_region_write(&vdev->bars[4].region, 0, i, 4);
1791 vfio_region_write(&vdev->bars[4].region, 4, 0, 4);
1792 }
1793
1794 if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
1795 vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
1796 error_report("IGD device %s - failed to restore PCI command register",
1797 vdev->vbasedev.name);
1798 }
1799
1800 trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, ggms_mb + gms_mb);
1801
1802out:
1803 g_free(rom);
1804 g_free(opregion);
1805 g_free(host);
1806 g_free(lpc);
1807}
1808
1809
1810
1811
1812void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
1813{
1814 vfio_vga_probe_ati_3c3_quirk(vdev);
1815 vfio_vga_probe_nvidia_3d0_quirk(vdev);
1816}
1817
1818void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
1819{
1820 VFIOQuirk *quirk;
1821 int i, j;
1822
1823 for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
1824 QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
1825 for (j = 0; j < quirk->nr_mem; j++) {
1826 memory_region_del_subregion(&vdev->vga->region[i].mem,
1827 &quirk->mem[j]);
1828 }
1829 }
1830 }
1831}
1832
1833void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
1834{
1835 int i, j;
1836
1837 for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
1838 while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
1839 VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
1840 QLIST_REMOVE(quirk, next);
1841 for (j = 0; j < quirk->nr_mem; j++) {
1842 object_unparent(OBJECT(&quirk->mem[j]));
1843 }
1844 g_free(quirk->mem);
1845 g_free(quirk->data);
1846 g_free(quirk);
1847 }
1848 }
1849}
1850
1851void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
1852{
1853 vfio_probe_ati_bar4_quirk(vdev, nr);
1854 vfio_probe_ati_bar2_quirk(vdev, nr);
1855 vfio_probe_nvidia_bar5_quirk(vdev, nr);
1856 vfio_probe_nvidia_bar0_quirk(vdev, nr);
1857 vfio_probe_rtl8168_bar2_quirk(vdev, nr);
1858 vfio_probe_igd_bar4_quirk(vdev, nr);
1859}
1860
1861void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
1862{
1863 VFIOBAR *bar = &vdev->bars[nr];
1864 VFIOQuirk *quirk;
1865 int i;
1866
1867 QLIST_FOREACH(quirk, &bar->quirks, next) {
1868 while (!QLIST_EMPTY(&quirk->ioeventfds)) {
1869 vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
1870 }
1871
1872 for (i = 0; i < quirk->nr_mem; i++) {
1873 memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
1874 }
1875 }
1876}
1877
1878void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
1879{
1880 VFIOBAR *bar = &vdev->bars[nr];
1881 int i;
1882
1883 while (!QLIST_EMPTY(&bar->quirks)) {
1884 VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
1885 QLIST_REMOVE(quirk, next);
1886 for (i = 0; i < quirk->nr_mem; i++) {
1887 object_unparent(OBJECT(&quirk->mem[i]));
1888 }
1889 g_free(quirk->mem);
1890 g_free(quirk->data);
1891 g_free(quirk);
1892 }
1893}
1894
1895
1896
1897
1898void vfio_quirk_reset(VFIOPCIDevice *vdev)
1899{
1900 int i;
1901
1902 for (i = 0; i < PCI_ROM_SLOT; i++) {
1903 VFIOQuirk *quirk;
1904 VFIOBAR *bar = &vdev->bars[i];
1905
1906 QLIST_FOREACH(quirk, &bar->quirks, next) {
1907 if (quirk->reset) {
1908 quirk->reset(vdev, quirk);
1909 }
1910 }
1911 }
1912}
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
1936{
1937 uint32_t clk, pc_c;
1938
1939
1940
1941
1942
1943 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1944 clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1945 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
1946 pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1947
1948 return (!(clk & 1) && (0x20100 <= pc_c));
1949}
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
1960{
1961 uint32_t misc, fuse;
1962 bool a, b;
1963
1964 vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
1965 fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1966 b = fuse & 64;
1967
1968 vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
1969 misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1970 a = misc & 2;
1971
1972 if (a == b) {
1973 vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
1974 vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1975 }
1976}
1977
1978static int vfio_radeon_reset(VFIOPCIDevice *vdev)
1979{
1980 PCIDevice *pdev = &vdev->pdev;
1981 int i, ret = 0;
1982 uint32_t data;
1983
1984
1985 if (vdev->vbasedev.reset_works) {
1986 trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
1987 return -ENODEV;
1988 }
1989
1990
1991 vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
1992
1993
1994 if (!vfio_radeon_smc_is_running(vdev)) {
1995 ret = -EINVAL;
1996 trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
1997 goto out;
1998 }
1999
2000
2001 vfio_radeon_set_gfx_only_reset(vdev);
2002
2003
2004 vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
2005 usleep(100);
2006
2007
2008 for (i = 0; i < 100000; i++) {
2009 if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
2010 goto reset_smc;
2011 }
2012 usleep(1);
2013 }
2014
2015 trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
2016
2017reset_smc:
2018
2019 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
2020 data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
2021 data |= 1;
2022 vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
2023
2024
2025 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
2026 data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
2027 data |= 1;
2028 vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
2029
2030 trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
2031
2032out:
2033
2034 vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
2035
2036 return ret;
2037}
2038
2039void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
2040{
2041 switch (vdev->vendor_id) {
2042 case 0x1002:
2043 switch (vdev->device_id) {
2044
2045 case 0x6649:
2046 case 0x6650:
2047 case 0x6651:
2048 case 0x6658:
2049 case 0x665c:
2050 case 0x665d:
2051
2052 case 0x67A0:
2053 case 0x67A1:
2054 case 0x67A2:
2055 case 0x67A8:
2056 case 0x67A9:
2057 case 0x67AA:
2058 case 0x67B0:
2059 case 0x67B1:
2060 case 0x67B8:
2061 case 0x67B9:
2062 case 0x67BA:
2063 case 0x67BE:
2064 vdev->resetfn = vfio_radeon_reset;
2065 trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
2066 break;
2067 }
2068 break;
2069 }
2070}
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
2090 const char *name, void *opaque,
2091 Error **errp)
2092{
2093 DeviceState *dev = DEVICE(obj);
2094 Property *prop = opaque;
2095 uint8_t *ptr = qdev_get_prop_ptr(dev, prop);
2096
2097 visit_type_uint8(v, name, ptr, errp);
2098}
2099
2100static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
2101 const char *name, void *opaque,
2102 Error **errp)
2103{
2104 DeviceState *dev = DEVICE(obj);
2105 Property *prop = opaque;
2106 uint8_t value, *ptr = qdev_get_prop_ptr(dev, prop);
2107 Error *local_err = NULL;
2108
2109 if (dev->realized) {
2110 qdev_prop_set_after_realize(dev, name, errp);
2111 return;
2112 }
2113
2114 visit_type_uint8(v, name, &value, &local_err);
2115 if (local_err) {
2116 error_propagate(errp, local_err);
2117 return;
2118 }
2119
2120 if (value & ~0xF) {
2121 error_setg(errp, "Property %s: valid range 0-15", name);
2122 return;
2123 }
2124
2125 *ptr = value;
2126}
2127
2128const PropertyInfo qdev_prop_nv_gpudirect_clique = {
2129 .name = "uint4",
2130 .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
2131 .get = get_nv_gpudirect_clique_id,
2132 .set = set_nv_gpudirect_clique_id,
2133};
2134
2135static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
2136{
2137 PCIDevice *pdev = &vdev->pdev;
2138 int ret, pos = 0xC8;
2139
2140 if (vdev->nv_gpudirect_clique == 0xFF) {
2141 return 0;
2142 }
2143
2144 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
2145 error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
2146 return -EINVAL;
2147 }
2148
2149 if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
2150 PCI_BASE_CLASS_DISPLAY) {
2151 error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
2152 return -EINVAL;
2153 }
2154
2155 ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
2156 if (ret < 0) {
2157 error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
2158 return ret;
2159 }
2160
2161 memset(vdev->emulated_config_bits + pos, 0xFF, 8);
2162 pos += PCI_CAP_FLAGS;
2163 pci_set_byte(pdev->config + pos++, 8);
2164 pci_set_byte(pdev->config + pos++, 'P');
2165 pci_set_byte(pdev->config + pos++, '2');
2166 pci_set_byte(pdev->config + pos++, 'P');
2167 pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
2168 pci_set_byte(pdev->config + pos, 0);
2169
2170 return 0;
2171}
2172
2173int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
2174{
2175 int ret;
2176
2177 ret = vfio_add_nv_gpudirect_cap(vdev, errp);
2178 if (ret) {
2179 return ret;
2180 }
2181
2182 return 0;
2183}
2184
2185static void vfio_pci_nvlink2_get_tgt(Object *obj, Visitor *v,
2186 const char *name,
2187 void *opaque, Error **errp)
2188{
2189 uint64_t tgt = (uintptr_t) opaque;
2190 visit_type_uint64(v, name, &tgt, errp);
2191}
2192
2193static void vfio_pci_nvlink2_get_link_speed(Object *obj, Visitor *v,
2194 const char *name,
2195 void *opaque, Error **errp)
2196{
2197 uint32_t link_speed = (uint32_t)(uintptr_t) opaque;
2198 visit_type_uint32(v, name, &link_speed, errp);
2199}
2200
2201int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp)
2202{
2203 int ret;
2204 void *p;
2205 struct vfio_region_info *nv2reg = NULL;
2206 struct vfio_info_cap_header *hdr;
2207 struct vfio_region_info_cap_nvlink2_ssatgt *cap;
2208 VFIOQuirk *quirk;
2209
2210 ret = vfio_get_dev_region_info(&vdev->vbasedev,
2211 VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
2212 PCI_VENDOR_ID_NVIDIA,
2213 VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
2214 &nv2reg);
2215 if (ret) {
2216 return ret;
2217 }
2218
2219 hdr = vfio_get_region_info_cap(nv2reg, VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
2220 if (!hdr) {
2221 ret = -ENODEV;
2222 goto free_exit;
2223 }
2224 cap = (void *) hdr;
2225
2226 p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE | PROT_EXEC,
2227 MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset);
2228 if (p == MAP_FAILED) {
2229 ret = -errno;
2230 goto free_exit;
2231 }
2232
2233 quirk = vfio_quirk_alloc(1);
2234 memory_region_init_ram_ptr(&quirk->mem[0], OBJECT(vdev), "nvlink2-mr",
2235 nv2reg->size, p);
2236 QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
2237
2238 object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
2239 vfio_pci_nvlink2_get_tgt, NULL, NULL,
2240 (void *) (uintptr_t) cap->tgt, NULL);
2241 trace_vfio_pci_nvidia_gpu_setup_quirk(vdev->vbasedev.name, cap->tgt,
2242 nv2reg->size);
2243free_exit:
2244 g_free(nv2reg);
2245
2246 return ret;
2247}
2248
2249int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp)
2250{
2251 int ret;
2252 void *p;
2253 struct vfio_region_info *atsdreg = NULL;
2254 struct vfio_info_cap_header *hdr;
2255 struct vfio_region_info_cap_nvlink2_ssatgt *captgt;
2256 struct vfio_region_info_cap_nvlink2_lnkspd *capspeed;
2257 VFIOQuirk *quirk;
2258
2259 ret = vfio_get_dev_region_info(&vdev->vbasedev,
2260 VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
2261 PCI_VENDOR_ID_IBM,
2262 VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
2263 &atsdreg);
2264 if (ret) {
2265 return ret;
2266 }
2267
2268 hdr = vfio_get_region_info_cap(atsdreg,
2269 VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
2270 if (!hdr) {
2271 ret = -ENODEV;
2272 goto free_exit;
2273 }
2274 captgt = (void *) hdr;
2275
2276 hdr = vfio_get_region_info_cap(atsdreg,
2277 VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD);
2278 if (!hdr) {
2279 ret = -ENODEV;
2280 goto free_exit;
2281 }
2282 capspeed = (void *) hdr;
2283
2284
2285 if (atsdreg->size) {
2286 p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE | PROT_EXEC,
2287 MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset);
2288 if (p == MAP_FAILED) {
2289 ret = -errno;
2290 goto free_exit;
2291 }
2292
2293 quirk = vfio_quirk_alloc(1);
2294 memory_region_init_ram_device_ptr(&quirk->mem[0], OBJECT(vdev),
2295 "nvlink2-atsd-mr", atsdreg->size, p);
2296 QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
2297 }
2298
2299 object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
2300 vfio_pci_nvlink2_get_tgt, NULL, NULL,
2301 (void *) (uintptr_t) captgt->tgt, NULL);
2302 trace_vfio_pci_nvlink2_setup_quirk_ssatgt(vdev->vbasedev.name, captgt->tgt,
2303 atsdreg->size);
2304
2305 object_property_add(OBJECT(vdev), "nvlink2-link-speed", "uint32",
2306 vfio_pci_nvlink2_get_link_speed, NULL, NULL,
2307 (void *) (uintptr_t) capspeed->link_speed, NULL);
2308 trace_vfio_pci_nvlink2_setup_quirk_lnkspd(vdev->vbasedev.name,
2309 capspeed->link_speed);
2310free_exit:
2311 g_free(atsdreg);
2312
2313 return ret;
2314}
2315