1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31#include <linux/init.h>
32#include <linux/device.h>
33#include <linux/mm.h>
34#include <linux/kthread.h>
35#include <linux/sched/mm.h>
36#include <linux/types.h>
37#include <linux/list.h>
38#include <linux/rbtree.h>
39#include <linux/spinlock.h>
40#include <linux/eventfd.h>
41#include <linux/uuid.h>
42#include <linux/kvm_host.h>
43#include <linux/vfio.h>
44#include <linux/mdev.h>
45#include <linux/debugfs.h>
46
47#include <linux/nospec.h>
48
49#include "i915_drv.h"
50#include "gvt.h"
51
52static const struct intel_gvt_ops *intel_gvt_ops;
53
54
55#define VFIO_PCI_OFFSET_SHIFT 40
56#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
57#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
58#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
59
60#define EDID_BLOB_OFFSET (PAGE_SIZE/2)
61
62#define OPREGION_SIGNATURE "IntelGraphicsMem"
63
64struct vfio_region;
65struct intel_vgpu_regops {
66 size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
67 size_t count, loff_t *ppos, bool iswrite);
68 void (*release)(struct intel_vgpu *vgpu,
69 struct vfio_region *region);
70};
71
72struct vfio_region {
73 u32 type;
74 u32 subtype;
75 size_t size;
76 u32 flags;
77 const struct intel_vgpu_regops *ops;
78 void *data;
79};
80
81struct vfio_edid_region {
82 struct vfio_region_gfx_edid vfio_edid_regs;
83 void *edid_blob;
84};
85
86struct kvmgt_pgfn {
87 gfn_t gfn;
88 struct hlist_node hnode;
89};
90
91#define KVMGT_DEBUGFS_FILENAME "kvmgt_nr_cache_entries"
92struct kvmgt_guest_info {
93 struct kvm *kvm;
94 struct intel_vgpu *vgpu;
95 struct kvm_page_track_notifier_node track_node;
96#define NR_BKT (1 << 18)
97 struct hlist_head ptable[NR_BKT];
98#undef NR_BKT
99};
100
101struct gvt_dma {
102 struct intel_vgpu *vgpu;
103 struct rb_node gfn_node;
104 struct rb_node dma_addr_node;
105 gfn_t gfn;
106 dma_addr_t dma_addr;
107 unsigned long size;
108 struct kref ref;
109};
110
111struct kvmgt_vdev {
112 struct intel_vgpu *vgpu;
113 struct mdev_device *mdev;
114 struct vfio_region *region;
115 int num_regions;
116 struct eventfd_ctx *intx_trigger;
117 struct eventfd_ctx *msi_trigger;
118
119
120
121
122
123 struct rb_root gfn_cache;
124 struct rb_root dma_addr_cache;
125 unsigned long nr_cache_entries;
126 struct mutex cache_lock;
127
128 struct notifier_block iommu_notifier;
129 struct notifier_block group_notifier;
130 struct kvm *kvm;
131 struct work_struct release_work;
132 atomic_t released;
133 struct vfio_device *vfio_device;
134 struct vfio_group *vfio_group;
135};
136
137static inline struct kvmgt_vdev *kvmgt_vdev(struct intel_vgpu *vgpu)
138{
139 return intel_vgpu_vdev(vgpu);
140}
141
142static inline bool handle_valid(unsigned long handle)
143{
144 return !!(handle & ~0xff);
145}
146
147static ssize_t available_instances_show(struct mdev_type *mtype,
148 struct mdev_type_attribute *attr,
149 char *buf)
150{
151 struct intel_vgpu_type *type;
152 unsigned int num = 0;
153 struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
154
155 type = &gvt->types[mtype_get_type_group_id(mtype)];
156 if (!type)
157 num = 0;
158 else
159 num = type->avail_instance;
160
161 return sprintf(buf, "%u\n", num);
162}
163
164static ssize_t device_api_show(struct mdev_type *mtype,
165 struct mdev_type_attribute *attr, char *buf)
166{
167 return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
168}
169
170static ssize_t description_show(struct mdev_type *mtype,
171 struct mdev_type_attribute *attr, char *buf)
172{
173 struct intel_vgpu_type *type;
174 struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
175
176 type = &gvt->types[mtype_get_type_group_id(mtype)];
177 if (!type)
178 return 0;
179
180 return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
181 "fence: %d\nresolution: %s\n"
182 "weight: %d\n",
183 BYTES_TO_MB(type->low_gm_size),
184 BYTES_TO_MB(type->high_gm_size),
185 type->fence, vgpu_edid_str(type->resolution),
186 type->weight);
187}
188
189static MDEV_TYPE_ATTR_RO(available_instances);
190static MDEV_TYPE_ATTR_RO(device_api);
191static MDEV_TYPE_ATTR_RO(description);
192
193static struct attribute *gvt_type_attrs[] = {
194 &mdev_type_attr_available_instances.attr,
195 &mdev_type_attr_device_api.attr,
196 &mdev_type_attr_description.attr,
197 NULL,
198};
199
200static struct attribute_group *gvt_vgpu_type_groups[] = {
201 [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
202};
203
204static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
205{
206 int i, j;
207 struct intel_vgpu_type *type;
208 struct attribute_group *group;
209
210 for (i = 0; i < gvt->num_types; i++) {
211 type = &gvt->types[i];
212
213 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
214 if (!group)
215 goto unwind;
216
217 group->name = type->name;
218 group->attrs = gvt_type_attrs;
219 gvt_vgpu_type_groups[i] = group;
220 }
221
222 return 0;
223
224unwind:
225 for (j = 0; j < i; j++) {
226 group = gvt_vgpu_type_groups[j];
227 kfree(group);
228 }
229
230 return -ENOMEM;
231}
232
233static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
234{
235 int i;
236 struct attribute_group *group;
237
238 for (i = 0; i < gvt->num_types; i++) {
239 group = gvt_vgpu_type_groups[i];
240 gvt_vgpu_type_groups[i] = NULL;
241 kfree(group);
242 }
243}
244
245static int kvmgt_guest_init(struct mdev_device *mdev);
246static void intel_vgpu_release_work(struct work_struct *work);
247static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
248
249static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
250 unsigned long size)
251{
252 struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
253 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
254 int total_pages;
255 int npage;
256 int ret;
257
258 total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
259
260 for (npage = 0; npage < total_pages; npage++) {
261 unsigned long cur_gfn = gfn + npage;
262
263 ret = vfio_group_unpin_pages(vdev->vfio_group, &cur_gfn, 1);
264 drm_WARN_ON(&i915->drm, ret != 1);
265 }
266}
267
268
269static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
270 unsigned long size, struct page **page)
271{
272 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
273 unsigned long base_pfn = 0;
274 int total_pages;
275 int npage;
276 int ret;
277
278 total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
279
280
281
282
283 for (npage = 0; npage < total_pages; npage++) {
284 unsigned long cur_gfn = gfn + npage;
285 unsigned long pfn;
286
287 ret = vfio_group_pin_pages(vdev->vfio_group, &cur_gfn, 1,
288 IOMMU_READ | IOMMU_WRITE, &pfn);
289 if (ret != 1) {
290 gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
291 cur_gfn, ret);
292 goto err;
293 }
294
295 if (!pfn_valid(pfn)) {
296 gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
297 npage++;
298 ret = -EFAULT;
299 goto err;
300 }
301
302 if (npage == 0)
303 base_pfn = pfn;
304 else if (base_pfn + npage != pfn) {
305 gvt_vgpu_err("The pages are not continuous\n");
306 ret = -EINVAL;
307 npage++;
308 goto err;
309 }
310 }
311
312 *page = pfn_to_page(base_pfn);
313 return 0;
314err:
315 gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
316 return ret;
317}
318
319static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
320 dma_addr_t *dma_addr, unsigned long size)
321{
322 struct device *dev = vgpu->gvt->gt->i915->drm.dev;
323 struct page *page = NULL;
324 int ret;
325
326 ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
327 if (ret)
328 return ret;
329
330
331 *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL);
332 if (dma_mapping_error(dev, *dma_addr)) {
333 gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
334 page_to_pfn(page), ret);
335 gvt_unpin_guest_page(vgpu, gfn, size);
336 return -ENOMEM;
337 }
338
339 return 0;
340}
341
342static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
343 dma_addr_t dma_addr, unsigned long size)
344{
345 struct device *dev = vgpu->gvt->gt->i915->drm.dev;
346
347 dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
348 gvt_unpin_guest_page(vgpu, gfn, size);
349}
350
351static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
352 dma_addr_t dma_addr)
353{
354 struct rb_node *node = kvmgt_vdev(vgpu)->dma_addr_cache.rb_node;
355 struct gvt_dma *itr;
356
357 while (node) {
358 itr = rb_entry(node, struct gvt_dma, dma_addr_node);
359
360 if (dma_addr < itr->dma_addr)
361 node = node->rb_left;
362 else if (dma_addr > itr->dma_addr)
363 node = node->rb_right;
364 else
365 return itr;
366 }
367 return NULL;
368}
369
370static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
371{
372 struct rb_node *node = kvmgt_vdev(vgpu)->gfn_cache.rb_node;
373 struct gvt_dma *itr;
374
375 while (node) {
376 itr = rb_entry(node, struct gvt_dma, gfn_node);
377
378 if (gfn < itr->gfn)
379 node = node->rb_left;
380 else if (gfn > itr->gfn)
381 node = node->rb_right;
382 else
383 return itr;
384 }
385 return NULL;
386}
387
388static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
389 dma_addr_t dma_addr, unsigned long size)
390{
391 struct gvt_dma *new, *itr;
392 struct rb_node **link, *parent = NULL;
393 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
394
395 new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
396 if (!new)
397 return -ENOMEM;
398
399 new->vgpu = vgpu;
400 new->gfn = gfn;
401 new->dma_addr = dma_addr;
402 new->size = size;
403 kref_init(&new->ref);
404
405
406 link = &vdev->gfn_cache.rb_node;
407 while (*link) {
408 parent = *link;
409 itr = rb_entry(parent, struct gvt_dma, gfn_node);
410
411 if (gfn < itr->gfn)
412 link = &parent->rb_left;
413 else
414 link = &parent->rb_right;
415 }
416 rb_link_node(&new->gfn_node, parent, link);
417 rb_insert_color(&new->gfn_node, &vdev->gfn_cache);
418
419
420 parent = NULL;
421 link = &vdev->dma_addr_cache.rb_node;
422 while (*link) {
423 parent = *link;
424 itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
425
426 if (dma_addr < itr->dma_addr)
427 link = &parent->rb_left;
428 else
429 link = &parent->rb_right;
430 }
431 rb_link_node(&new->dma_addr_node, parent, link);
432 rb_insert_color(&new->dma_addr_node, &vdev->dma_addr_cache);
433
434 vdev->nr_cache_entries++;
435 return 0;
436}
437
438static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
439 struct gvt_dma *entry)
440{
441 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
442
443 rb_erase(&entry->gfn_node, &vdev->gfn_cache);
444 rb_erase(&entry->dma_addr_node, &vdev->dma_addr_cache);
445 kfree(entry);
446 vdev->nr_cache_entries--;
447}
448
449static void gvt_cache_destroy(struct intel_vgpu *vgpu)
450{
451 struct gvt_dma *dma;
452 struct rb_node *node = NULL;
453 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
454
455 for (;;) {
456 mutex_lock(&vdev->cache_lock);
457 node = rb_first(&vdev->gfn_cache);
458 if (!node) {
459 mutex_unlock(&vdev->cache_lock);
460 break;
461 }
462 dma = rb_entry(node, struct gvt_dma, gfn_node);
463 gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
464 __gvt_cache_remove_entry(vgpu, dma);
465 mutex_unlock(&vdev->cache_lock);
466 }
467}
468
469static void gvt_cache_init(struct intel_vgpu *vgpu)
470{
471 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
472
473 vdev->gfn_cache = RB_ROOT;
474 vdev->dma_addr_cache = RB_ROOT;
475 vdev->nr_cache_entries = 0;
476 mutex_init(&vdev->cache_lock);
477}
478
479static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
480{
481 hash_init(info->ptable);
482}
483
484static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
485{
486 struct kvmgt_pgfn *p;
487 struct hlist_node *tmp;
488 int i;
489
490 hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
491 hash_del(&p->hnode);
492 kfree(p);
493 }
494}
495
496static struct kvmgt_pgfn *
497__kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
498{
499 struct kvmgt_pgfn *p, *res = NULL;
500
501 hash_for_each_possible(info->ptable, p, hnode, gfn) {
502 if (gfn == p->gfn) {
503 res = p;
504 break;
505 }
506 }
507
508 return res;
509}
510
511static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
512 gfn_t gfn)
513{
514 struct kvmgt_pgfn *p;
515
516 p = __kvmgt_protect_table_find(info, gfn);
517 return !!p;
518}
519
520static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
521{
522 struct kvmgt_pgfn *p;
523
524 if (kvmgt_gfn_is_write_protected(info, gfn))
525 return;
526
527 p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
528 if (WARN(!p, "gfn: 0x%llx\n", gfn))
529 return;
530
531 p->gfn = gfn;
532 hash_add(info->ptable, &p->hnode, gfn);
533}
534
535static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
536 gfn_t gfn)
537{
538 struct kvmgt_pgfn *p;
539
540 p = __kvmgt_protect_table_find(info, gfn);
541 if (p) {
542 hash_del(&p->hnode);
543 kfree(p);
544 }
545}
546
547static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
548 size_t count, loff_t *ppos, bool iswrite)
549{
550 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
551 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
552 VFIO_PCI_NUM_REGIONS;
553 void *base = vdev->region[i].data;
554 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
555
556
557 if (pos >= vdev->region[i].size || iswrite) {
558 gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
559 return -EINVAL;
560 }
561 count = min(count, (size_t)(vdev->region[i].size - pos));
562 memcpy(buf, base + pos, count);
563
564 return count;
565}
566
567static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
568 struct vfio_region *region)
569{
570}
571
572static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
573 .rw = intel_vgpu_reg_rw_opregion,
574 .release = intel_vgpu_reg_release_opregion,
575};
576
577static int handle_edid_regs(struct intel_vgpu *vgpu,
578 struct vfio_edid_region *region, char *buf,
579 size_t count, u16 offset, bool is_write)
580{
581 struct vfio_region_gfx_edid *regs = ®ion->vfio_edid_regs;
582 unsigned int data;
583
584 if (offset + count > sizeof(*regs))
585 return -EINVAL;
586
587 if (count != 4)
588 return -EINVAL;
589
590 if (is_write) {
591 data = *((unsigned int *)buf);
592 switch (offset) {
593 case offsetof(struct vfio_region_gfx_edid, link_state):
594 if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
595 if (!drm_edid_block_valid(
596 (u8 *)region->edid_blob,
597 0,
598 true,
599 NULL)) {
600 gvt_vgpu_err("invalid EDID blob\n");
601 return -EINVAL;
602 }
603 intel_gvt_ops->emulate_hotplug(vgpu, true);
604 } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
605 intel_gvt_ops->emulate_hotplug(vgpu, false);
606 else {
607 gvt_vgpu_err("invalid EDID link state %d\n",
608 regs->link_state);
609 return -EINVAL;
610 }
611 regs->link_state = data;
612 break;
613 case offsetof(struct vfio_region_gfx_edid, edid_size):
614 if (data > regs->edid_max_size) {
615 gvt_vgpu_err("EDID size is bigger than %d!\n",
616 regs->edid_max_size);
617 return -EINVAL;
618 }
619 regs->edid_size = data;
620 break;
621 default:
622
623 gvt_vgpu_err("write read-only EDID region at offset %d\n",
624 offset);
625 return -EPERM;
626 }
627 } else {
628 memcpy(buf, (char *)regs + offset, count);
629 }
630
631 return count;
632}
633
634static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
635 size_t count, u16 offset, bool is_write)
636{
637 if (offset + count > region->vfio_edid_regs.edid_size)
638 return -EINVAL;
639
640 if (is_write)
641 memcpy(region->edid_blob + offset, buf, count);
642 else
643 memcpy(buf, region->edid_blob + offset, count);
644
645 return count;
646}
647
648static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
649 size_t count, loff_t *ppos, bool iswrite)
650{
651 int ret;
652 unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
653 VFIO_PCI_NUM_REGIONS;
654 struct vfio_edid_region *region =
655 (struct vfio_edid_region *)kvmgt_vdev(vgpu)->region[i].data;
656 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
657
658 if (pos < region->vfio_edid_regs.edid_offset) {
659 ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
660 } else {
661 pos -= EDID_BLOB_OFFSET;
662 ret = handle_edid_blob(region, buf, count, pos, iswrite);
663 }
664
665 if (ret < 0)
666 gvt_vgpu_err("failed to access EDID region\n");
667
668 return ret;
669}
670
671static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
672 struct vfio_region *region)
673{
674 kfree(region->data);
675}
676
677static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
678 .rw = intel_vgpu_reg_rw_edid,
679 .release = intel_vgpu_reg_release_edid,
680};
681
682static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
683 unsigned int type, unsigned int subtype,
684 const struct intel_vgpu_regops *ops,
685 size_t size, u32 flags, void *data)
686{
687 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
688 struct vfio_region *region;
689
690 region = krealloc(vdev->region,
691 (vdev->num_regions + 1) * sizeof(*region),
692 GFP_KERNEL);
693 if (!region)
694 return -ENOMEM;
695
696 vdev->region = region;
697 vdev->region[vdev->num_regions].type = type;
698 vdev->region[vdev->num_regions].subtype = subtype;
699 vdev->region[vdev->num_regions].ops = ops;
700 vdev->region[vdev->num_regions].size = size;
701 vdev->region[vdev->num_regions].flags = flags;
702 vdev->region[vdev->num_regions].data = data;
703 vdev->num_regions++;
704 return 0;
705}
706
707static int kvmgt_get_vfio_device(void *p_vgpu)
708{
709 struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
710 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
711
712 vdev->vfio_device = vfio_device_get_from_dev(
713 mdev_dev(vdev->mdev));
714 if (!vdev->vfio_device) {
715 gvt_vgpu_err("failed to get vfio device\n");
716 return -ENODEV;
717 }
718 return 0;
719}
720
721
722static int kvmgt_set_opregion(void *p_vgpu)
723{
724 struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
725 void *base;
726 int ret;
727
728
729
730
731
732 base = vgpu_opregion(vgpu)->va;
733 if (!base)
734 return -ENOMEM;
735
736 if (memcmp(base, OPREGION_SIGNATURE, 16)) {
737 memunmap(base);
738 return -EINVAL;
739 }
740
741 ret = intel_vgpu_register_reg(vgpu,
742 PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
743 VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
744 &intel_vgpu_regops_opregion, OPREGION_SIZE,
745 VFIO_REGION_INFO_FLAG_READ, base);
746
747 return ret;
748}
749
750static int kvmgt_set_edid(void *p_vgpu, int port_num)
751{
752 struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
753 struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
754 struct vfio_edid_region *base;
755 int ret;
756
757 base = kzalloc(sizeof(*base), GFP_KERNEL);
758 if (!base)
759 return -ENOMEM;
760
761
762 base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
763 base->vfio_edid_regs.edid_max_size = EDID_SIZE;
764 base->vfio_edid_regs.edid_size = EDID_SIZE;
765 base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
766 base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
767 base->edid_blob = port->edid->edid_block;
768
769 ret = intel_vgpu_register_reg(vgpu,
770 VFIO_REGION_TYPE_GFX,
771 VFIO_REGION_SUBTYPE_GFX_EDID,
772 &intel_vgpu_regops_edid, EDID_SIZE,
773 VFIO_REGION_INFO_FLAG_READ |
774 VFIO_REGION_INFO_FLAG_WRITE |
775 VFIO_REGION_INFO_FLAG_CAPS, base);
776
777 return ret;
778}
779
780static void kvmgt_put_vfio_device(void *vgpu)
781{
782 struct kvmgt_vdev *vdev = kvmgt_vdev((struct intel_vgpu *)vgpu);
783
784 if (WARN_ON(!vdev->vfio_device))
785 return;
786
787 vfio_device_put(vdev->vfio_device);
788}
789
790static int intel_vgpu_create(struct mdev_device *mdev)
791{
792 struct intel_vgpu *vgpu = NULL;
793 struct intel_vgpu_type *type;
794 struct device *pdev;
795 struct intel_gvt *gvt;
796 int ret;
797
798 pdev = mdev_parent_dev(mdev);
799 gvt = kdev_to_i915(pdev)->gvt;
800
801 type = &gvt->types[mdev_get_type_group_id(mdev)];
802 if (!type) {
803 ret = -EINVAL;
804 goto out;
805 }
806
807 vgpu = intel_gvt_ops->vgpu_create(gvt, type);
808 if (IS_ERR_OR_NULL(vgpu)) {
809 ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
810 gvt_err("failed to create intel vgpu: %d\n", ret);
811 goto out;
812 }
813
814 INIT_WORK(&kvmgt_vdev(vgpu)->release_work, intel_vgpu_release_work);
815
816 kvmgt_vdev(vgpu)->mdev = mdev;
817 mdev_set_drvdata(mdev, vgpu);
818
819 gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
820 dev_name(mdev_dev(mdev)));
821 ret = 0;
822
823out:
824 return ret;
825}
826
827static int intel_vgpu_remove(struct mdev_device *mdev)
828{
829 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
830
831 if (handle_valid(vgpu->handle))
832 return -EBUSY;
833
834 intel_gvt_ops->vgpu_destroy(vgpu);
835 return 0;
836}
837
838static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
839 unsigned long action, void *data)
840{
841 struct kvmgt_vdev *vdev = container_of(nb,
842 struct kvmgt_vdev,
843 iommu_notifier);
844 struct intel_vgpu *vgpu = vdev->vgpu;
845
846 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
847 struct vfio_iommu_type1_dma_unmap *unmap = data;
848 struct gvt_dma *entry;
849 unsigned long iov_pfn, end_iov_pfn;
850
851 iov_pfn = unmap->iova >> PAGE_SHIFT;
852 end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
853
854 mutex_lock(&vdev->cache_lock);
855 for (; iov_pfn < end_iov_pfn; iov_pfn++) {
856 entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
857 if (!entry)
858 continue;
859
860 gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
861 entry->size);
862 __gvt_cache_remove_entry(vgpu, entry);
863 }
864 mutex_unlock(&vdev->cache_lock);
865 }
866
867 return NOTIFY_OK;
868}
869
870static int intel_vgpu_group_notifier(struct notifier_block *nb,
871 unsigned long action, void *data)
872{
873 struct kvmgt_vdev *vdev = container_of(nb,
874 struct kvmgt_vdev,
875 group_notifier);
876
877
878 if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
879 vdev->kvm = data;
880
881 if (!data)
882 schedule_work(&vdev->release_work);
883 }
884
885 return NOTIFY_OK;
886}
887
888static int intel_vgpu_open_device(struct mdev_device *mdev)
889{
890 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
891 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
892 unsigned long events;
893 int ret;
894 struct vfio_group *vfio_group;
895
896 vdev->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
897 vdev->group_notifier.notifier_call = intel_vgpu_group_notifier;
898
899 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
900 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
901 &vdev->iommu_notifier);
902 if (ret != 0) {
903 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
904 ret);
905 goto out;
906 }
907
908 events = VFIO_GROUP_NOTIFY_SET_KVM;
909 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
910 &vdev->group_notifier);
911 if (ret != 0) {
912 gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
913 ret);
914 goto undo_iommu;
915 }
916
917 vfio_group = vfio_group_get_external_user_from_dev(mdev_dev(mdev));
918 if (IS_ERR_OR_NULL(vfio_group)) {
919 ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group);
920 gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n");
921 goto undo_register;
922 }
923 vdev->vfio_group = vfio_group;
924
925
926
927
928 if (!try_module_get(THIS_MODULE)) {
929 ret = -ENODEV;
930 goto undo_group;
931 }
932
933 ret = kvmgt_guest_init(mdev);
934 if (ret)
935 goto undo_group;
936
937 intel_gvt_ops->vgpu_activate(vgpu);
938
939 atomic_set(&vdev->released, 0);
940 return ret;
941
942undo_group:
943 vfio_group_put_external_user(vdev->vfio_group);
944 vdev->vfio_group = NULL;
945
946undo_register:
947 vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
948 &vdev->group_notifier);
949
950undo_iommu:
951 vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
952 &vdev->iommu_notifier);
953out:
954 return ret;
955}
956
957static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
958{
959 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
960 struct eventfd_ctx *trigger;
961
962 trigger = vdev->msi_trigger;
963 if (trigger) {
964 eventfd_ctx_put(trigger);
965 vdev->msi_trigger = NULL;
966 }
967}
968
969static void __intel_vgpu_release(struct intel_vgpu *vgpu)
970{
971 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
972 struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
973 struct kvmgt_guest_info *info;
974 int ret;
975
976 if (!handle_valid(vgpu->handle))
977 return;
978
979 if (atomic_cmpxchg(&vdev->released, 0, 1))
980 return;
981
982 intel_gvt_ops->vgpu_release(vgpu);
983
984 ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_IOMMU_NOTIFY,
985 &vdev->iommu_notifier);
986 drm_WARN(&i915->drm, ret,
987 "vfio_unregister_notifier for iommu failed: %d\n", ret);
988
989 ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_GROUP_NOTIFY,
990 &vdev->group_notifier);
991 drm_WARN(&i915->drm, ret,
992 "vfio_unregister_notifier for group failed: %d\n", ret);
993
994
995 module_put(THIS_MODULE);
996
997 info = (struct kvmgt_guest_info *)vgpu->handle;
998 kvmgt_guest_exit(info);
999
1000 intel_vgpu_release_msi_eventfd_ctx(vgpu);
1001 vfio_group_put_external_user(vdev->vfio_group);
1002
1003 vdev->kvm = NULL;
1004 vgpu->handle = 0;
1005}
1006
1007static void intel_vgpu_close_device(struct mdev_device *mdev)
1008{
1009 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1010
1011 __intel_vgpu_release(vgpu);
1012}
1013
1014static void intel_vgpu_release_work(struct work_struct *work)
1015{
1016 struct kvmgt_vdev *vdev = container_of(work, struct kvmgt_vdev,
1017 release_work);
1018
1019 __intel_vgpu_release(vdev->vgpu);
1020}
1021
1022static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
1023{
1024 u32 start_lo, start_hi;
1025 u32 mem_type;
1026
1027 start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
1028 PCI_BASE_ADDRESS_MEM_MASK;
1029 mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
1030 PCI_BASE_ADDRESS_MEM_TYPE_MASK;
1031
1032 switch (mem_type) {
1033 case PCI_BASE_ADDRESS_MEM_TYPE_64:
1034 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
1035 + bar + 4));
1036 break;
1037 case PCI_BASE_ADDRESS_MEM_TYPE_32:
1038 case PCI_BASE_ADDRESS_MEM_TYPE_1M:
1039
1040 default:
1041
1042 start_hi = 0;
1043 break;
1044 }
1045
1046 return ((u64)start_hi << 32) | start_lo;
1047}
1048
1049static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off,
1050 void *buf, unsigned int count, bool is_write)
1051{
1052 u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
1053 int ret;
1054
1055 if (is_write)
1056 ret = intel_gvt_ops->emulate_mmio_write(vgpu,
1057 bar_start + off, buf, count);
1058 else
1059 ret = intel_gvt_ops->emulate_mmio_read(vgpu,
1060 bar_start + off, buf, count);
1061 return ret;
1062}
1063
1064static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off)
1065{
1066 return off >= vgpu_aperture_offset(vgpu) &&
1067 off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
1068}
1069
1070static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
1071 void *buf, unsigned long count, bool is_write)
1072{
1073 void __iomem *aperture_va;
1074
1075 if (!intel_vgpu_in_aperture(vgpu, off) ||
1076 !intel_vgpu_in_aperture(vgpu, off + count)) {
1077 gvt_vgpu_err("Invalid aperture offset %llu\n", off);
1078 return -EINVAL;
1079 }
1080
1081 aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap,
1082 ALIGN_DOWN(off, PAGE_SIZE),
1083 count + offset_in_page(off));
1084 if (!aperture_va)
1085 return -EIO;
1086
1087 if (is_write)
1088 memcpy_toio(aperture_va + offset_in_page(off), buf, count);
1089 else
1090 memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
1091
1092 io_mapping_unmap(aperture_va);
1093
1094 return 0;
1095}
1096
1097static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
1098 size_t count, loff_t *ppos, bool is_write)
1099{
1100 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1101 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
1102 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
1103 u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
1104 int ret = -EINVAL;
1105
1106
1107 if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) {
1108 gvt_vgpu_err("invalid index: %u\n", index);
1109 return -EINVAL;
1110 }
1111
1112 switch (index) {
1113 case VFIO_PCI_CONFIG_REGION_INDEX:
1114 if (is_write)
1115 ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
1116 buf, count);
1117 else
1118 ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
1119 buf, count);
1120 break;
1121 case VFIO_PCI_BAR0_REGION_INDEX:
1122 ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
1123 buf, count, is_write);
1124 break;
1125 case VFIO_PCI_BAR2_REGION_INDEX:
1126 ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write);
1127 break;
1128 case VFIO_PCI_BAR1_REGION_INDEX:
1129 case VFIO_PCI_BAR3_REGION_INDEX:
1130 case VFIO_PCI_BAR4_REGION_INDEX:
1131 case VFIO_PCI_BAR5_REGION_INDEX:
1132 case VFIO_PCI_VGA_REGION_INDEX:
1133 case VFIO_PCI_ROM_REGION_INDEX:
1134 break;
1135 default:
1136 if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
1137 return -EINVAL;
1138
1139 index -= VFIO_PCI_NUM_REGIONS;
1140 return vdev->region[index].ops->rw(vgpu, buf, count,
1141 ppos, is_write);
1142 }
1143
1144 return ret == 0 ? count : ret;
1145}
1146
1147static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
1148{
1149 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1150 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
1151 struct intel_gvt *gvt = vgpu->gvt;
1152 int offset;
1153
1154
1155 if (index != PCI_BASE_ADDRESS_0)
1156 return false;
1157
1158 offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
1159 intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
1160
1161 return (offset >= gvt->device_info.gtt_start_offset &&
1162 offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
1163 true : false;
1164}
1165
1166static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
1167 size_t count, loff_t *ppos)
1168{
1169 unsigned int done = 0;
1170 int ret;
1171
1172 while (count) {
1173 size_t filled;
1174
1175
1176 if (count >= 8 && !(*ppos % 8) &&
1177 gtt_entry(mdev, ppos)) {
1178 u64 val;
1179
1180 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1181 ppos, false);
1182 if (ret <= 0)
1183 goto read_err;
1184
1185 if (copy_to_user(buf, &val, sizeof(val)))
1186 goto read_err;
1187
1188 filled = 8;
1189 } else if (count >= 4 && !(*ppos % 4)) {
1190 u32 val;
1191
1192 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1193 ppos, false);
1194 if (ret <= 0)
1195 goto read_err;
1196
1197 if (copy_to_user(buf, &val, sizeof(val)))
1198 goto read_err;
1199
1200 filled = 4;
1201 } else if (count >= 2 && !(*ppos % 2)) {
1202 u16 val;
1203
1204 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1205 ppos, false);
1206 if (ret <= 0)
1207 goto read_err;
1208
1209 if (copy_to_user(buf, &val, sizeof(val)))
1210 goto read_err;
1211
1212 filled = 2;
1213 } else {
1214 u8 val;
1215
1216 ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
1217 false);
1218 if (ret <= 0)
1219 goto read_err;
1220
1221 if (copy_to_user(buf, &val, sizeof(val)))
1222 goto read_err;
1223
1224 filled = 1;
1225 }
1226
1227 count -= filled;
1228 done += filled;
1229 *ppos += filled;
1230 buf += filled;
1231 }
1232
1233 return done;
1234
1235read_err:
1236 return -EFAULT;
1237}
1238
1239static ssize_t intel_vgpu_write(struct mdev_device *mdev,
1240 const char __user *buf,
1241 size_t count, loff_t *ppos)
1242{
1243 unsigned int done = 0;
1244 int ret;
1245
1246 while (count) {
1247 size_t filled;
1248
1249
1250 if (count >= 8 && !(*ppos % 8) &&
1251 gtt_entry(mdev, ppos)) {
1252 u64 val;
1253
1254 if (copy_from_user(&val, buf, sizeof(val)))
1255 goto write_err;
1256
1257 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1258 ppos, true);
1259 if (ret <= 0)
1260 goto write_err;
1261
1262 filled = 8;
1263 } else if (count >= 4 && !(*ppos % 4)) {
1264 u32 val;
1265
1266 if (copy_from_user(&val, buf, sizeof(val)))
1267 goto write_err;
1268
1269 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1270 ppos, true);
1271 if (ret <= 0)
1272 goto write_err;
1273
1274 filled = 4;
1275 } else if (count >= 2 && !(*ppos % 2)) {
1276 u16 val;
1277
1278 if (copy_from_user(&val, buf, sizeof(val)))
1279 goto write_err;
1280
1281 ret = intel_vgpu_rw(mdev, (char *)&val,
1282 sizeof(val), ppos, true);
1283 if (ret <= 0)
1284 goto write_err;
1285
1286 filled = 2;
1287 } else {
1288 u8 val;
1289
1290 if (copy_from_user(&val, buf, sizeof(val)))
1291 goto write_err;
1292
1293 ret = intel_vgpu_rw(mdev, &val, sizeof(val),
1294 ppos, true);
1295 if (ret <= 0)
1296 goto write_err;
1297
1298 filled = 1;
1299 }
1300
1301 count -= filled;
1302 done += filled;
1303 *ppos += filled;
1304 buf += filled;
1305 }
1306
1307 return done;
1308write_err:
1309 return -EFAULT;
1310}
1311
1312static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
1313{
1314 unsigned int index;
1315 u64 virtaddr;
1316 unsigned long req_size, pgoff, req_start;
1317 pgprot_t pg_prot;
1318 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1319
1320 index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
1321 if (index >= VFIO_PCI_ROM_REGION_INDEX)
1322 return -EINVAL;
1323
1324 if (vma->vm_end < vma->vm_start)
1325 return -EINVAL;
1326 if ((vma->vm_flags & VM_SHARED) == 0)
1327 return -EINVAL;
1328 if (index != VFIO_PCI_BAR2_REGION_INDEX)
1329 return -EINVAL;
1330
1331 pg_prot = vma->vm_page_prot;
1332 virtaddr = vma->vm_start;
1333 req_size = vma->vm_end - vma->vm_start;
1334 pgoff = vma->vm_pgoff &
1335 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
1336 req_start = pgoff << PAGE_SHIFT;
1337
1338 if (!intel_vgpu_in_aperture(vgpu, req_start))
1339 return -EINVAL;
1340 if (req_start + req_size >
1341 vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
1342 return -EINVAL;
1343
1344 pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
1345
1346 return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
1347}
1348
1349static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
1350{
1351 if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
1352 return 1;
1353
1354 return 0;
1355}
1356
1357static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
1358 unsigned int index, unsigned int start,
1359 unsigned int count, u32 flags,
1360 void *data)
1361{
1362 return 0;
1363}
1364
1365static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
1366 unsigned int index, unsigned int start,
1367 unsigned int count, u32 flags, void *data)
1368{
1369 return 0;
1370}
1371
1372static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
1373 unsigned int index, unsigned int start, unsigned int count,
1374 u32 flags, void *data)
1375{
1376 return 0;
1377}
1378
1379static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
1380 unsigned int index, unsigned int start, unsigned int count,
1381 u32 flags, void *data)
1382{
1383 struct eventfd_ctx *trigger;
1384
1385 if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
1386 int fd = *(int *)data;
1387
1388 trigger = eventfd_ctx_fdget(fd);
1389 if (IS_ERR(trigger)) {
1390 gvt_vgpu_err("eventfd_ctx_fdget failed\n");
1391 return PTR_ERR(trigger);
1392 }
1393 kvmgt_vdev(vgpu)->msi_trigger = trigger;
1394 } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count)
1395 intel_vgpu_release_msi_eventfd_ctx(vgpu);
1396
1397 return 0;
1398}
1399
1400static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags,
1401 unsigned int index, unsigned int start, unsigned int count,
1402 void *data)
1403{
1404 int (*func)(struct intel_vgpu *vgpu, unsigned int index,
1405 unsigned int start, unsigned int count, u32 flags,
1406 void *data) = NULL;
1407
1408 switch (index) {
1409 case VFIO_PCI_INTX_IRQ_INDEX:
1410 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1411 case VFIO_IRQ_SET_ACTION_MASK:
1412 func = intel_vgpu_set_intx_mask;
1413 break;
1414 case VFIO_IRQ_SET_ACTION_UNMASK:
1415 func = intel_vgpu_set_intx_unmask;
1416 break;
1417 case VFIO_IRQ_SET_ACTION_TRIGGER:
1418 func = intel_vgpu_set_intx_trigger;
1419 break;
1420 }
1421 break;
1422 case VFIO_PCI_MSI_IRQ_INDEX:
1423 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1424 case VFIO_IRQ_SET_ACTION_MASK:
1425 case VFIO_IRQ_SET_ACTION_UNMASK:
1426
1427 break;
1428 case VFIO_IRQ_SET_ACTION_TRIGGER:
1429 func = intel_vgpu_set_msi_trigger;
1430 break;
1431 }
1432 break;
1433 }
1434
1435 if (!func)
1436 return -ENOTTY;
1437
1438 return func(vgpu, index, start, count, flags, data);
1439}
1440
1441static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
1442 unsigned long arg)
1443{
1444 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1445 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
1446 unsigned long minsz;
1447
1448 gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
1449
1450 if (cmd == VFIO_DEVICE_GET_INFO) {
1451 struct vfio_device_info info;
1452
1453 minsz = offsetofend(struct vfio_device_info, num_irqs);
1454
1455 if (copy_from_user(&info, (void __user *)arg, minsz))
1456 return -EFAULT;
1457
1458 if (info.argsz < minsz)
1459 return -EINVAL;
1460
1461 info.flags = VFIO_DEVICE_FLAGS_PCI;
1462 info.flags |= VFIO_DEVICE_FLAGS_RESET;
1463 info.num_regions = VFIO_PCI_NUM_REGIONS +
1464 vdev->num_regions;
1465 info.num_irqs = VFIO_PCI_NUM_IRQS;
1466
1467 return copy_to_user((void __user *)arg, &info, minsz) ?
1468 -EFAULT : 0;
1469
1470 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
1471 struct vfio_region_info info;
1472 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
1473 unsigned int i;
1474 int ret;
1475 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
1476 int nr_areas = 1;
1477 int cap_type_id;
1478
1479 minsz = offsetofend(struct vfio_region_info, offset);
1480
1481 if (copy_from_user(&info, (void __user *)arg, minsz))
1482 return -EFAULT;
1483
1484 if (info.argsz < minsz)
1485 return -EINVAL;
1486
1487 switch (info.index) {
1488 case VFIO_PCI_CONFIG_REGION_INDEX:
1489 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1490 info.size = vgpu->gvt->device_info.cfg_space_size;
1491 info.flags = VFIO_REGION_INFO_FLAG_READ |
1492 VFIO_REGION_INFO_FLAG_WRITE;
1493 break;
1494 case VFIO_PCI_BAR0_REGION_INDEX:
1495 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1496 info.size = vgpu->cfg_space.bar[info.index].size;
1497 if (!info.size) {
1498 info.flags = 0;
1499 break;
1500 }
1501
1502 info.flags = VFIO_REGION_INFO_FLAG_READ |
1503 VFIO_REGION_INFO_FLAG_WRITE;
1504 break;
1505 case VFIO_PCI_BAR1_REGION_INDEX:
1506 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1507 info.size = 0;
1508 info.flags = 0;
1509 break;
1510 case VFIO_PCI_BAR2_REGION_INDEX:
1511 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1512 info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1513 VFIO_REGION_INFO_FLAG_MMAP |
1514 VFIO_REGION_INFO_FLAG_READ |
1515 VFIO_REGION_INFO_FLAG_WRITE;
1516 info.size = gvt_aperture_sz(vgpu->gvt);
1517
1518 sparse = kzalloc(struct_size(sparse, areas, nr_areas),
1519 GFP_KERNEL);
1520 if (!sparse)
1521 return -ENOMEM;
1522
1523 sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1524 sparse->header.version = 1;
1525 sparse->nr_areas = nr_areas;
1526 cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1527 sparse->areas[0].offset =
1528 PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1529 sparse->areas[0].size = vgpu_aperture_sz(vgpu);
1530 break;
1531
1532 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1533 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1534 info.size = 0;
1535 info.flags = 0;
1536
1537 gvt_dbg_core("get region info bar:%d\n", info.index);
1538 break;
1539
1540 case VFIO_PCI_ROM_REGION_INDEX:
1541 case VFIO_PCI_VGA_REGION_INDEX:
1542 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1543 info.size = 0;
1544 info.flags = 0;
1545
1546 gvt_dbg_core("get region info index:%d\n", info.index);
1547 break;
1548 default:
1549 {
1550 struct vfio_region_info_cap_type cap_type = {
1551 .header.id = VFIO_REGION_INFO_CAP_TYPE,
1552 .header.version = 1 };
1553
1554 if (info.index >= VFIO_PCI_NUM_REGIONS +
1555 vdev->num_regions)
1556 return -EINVAL;
1557 info.index =
1558 array_index_nospec(info.index,
1559 VFIO_PCI_NUM_REGIONS +
1560 vdev->num_regions);
1561
1562 i = info.index - VFIO_PCI_NUM_REGIONS;
1563
1564 info.offset =
1565 VFIO_PCI_INDEX_TO_OFFSET(info.index);
1566 info.size = vdev->region[i].size;
1567 info.flags = vdev->region[i].flags;
1568
1569 cap_type.type = vdev->region[i].type;
1570 cap_type.subtype = vdev->region[i].subtype;
1571
1572 ret = vfio_info_add_capability(&caps,
1573 &cap_type.header,
1574 sizeof(cap_type));
1575 if (ret)
1576 return ret;
1577 }
1578 }
1579
1580 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1581 switch (cap_type_id) {
1582 case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1583 ret = vfio_info_add_capability(&caps,
1584 &sparse->header,
1585 struct_size(sparse, areas,
1586 sparse->nr_areas));
1587 if (ret) {
1588 kfree(sparse);
1589 return ret;
1590 }
1591 break;
1592 default:
1593 kfree(sparse);
1594 return -EINVAL;
1595 }
1596 }
1597
1598 if (caps.size) {
1599 info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
1600 if (info.argsz < sizeof(info) + caps.size) {
1601 info.argsz = sizeof(info) + caps.size;
1602 info.cap_offset = 0;
1603 } else {
1604 vfio_info_cap_shift(&caps, sizeof(info));
1605 if (copy_to_user((void __user *)arg +
1606 sizeof(info), caps.buf,
1607 caps.size)) {
1608 kfree(caps.buf);
1609 kfree(sparse);
1610 return -EFAULT;
1611 }
1612 info.cap_offset = sizeof(info);
1613 }
1614
1615 kfree(caps.buf);
1616 }
1617
1618 kfree(sparse);
1619 return copy_to_user((void __user *)arg, &info, minsz) ?
1620 -EFAULT : 0;
1621 } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1622 struct vfio_irq_info info;
1623
1624 minsz = offsetofend(struct vfio_irq_info, count);
1625
1626 if (copy_from_user(&info, (void __user *)arg, minsz))
1627 return -EFAULT;
1628
1629 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1630 return -EINVAL;
1631
1632 switch (info.index) {
1633 case VFIO_PCI_INTX_IRQ_INDEX:
1634 case VFIO_PCI_MSI_IRQ_INDEX:
1635 break;
1636 default:
1637 return -EINVAL;
1638 }
1639
1640 info.flags = VFIO_IRQ_INFO_EVENTFD;
1641
1642 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1643
1644 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1645 info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1646 VFIO_IRQ_INFO_AUTOMASKED);
1647 else
1648 info.flags |= VFIO_IRQ_INFO_NORESIZE;
1649
1650 return copy_to_user((void __user *)arg, &info, minsz) ?
1651 -EFAULT : 0;
1652 } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1653 struct vfio_irq_set hdr;
1654 u8 *data = NULL;
1655 int ret = 0;
1656 size_t data_size = 0;
1657
1658 minsz = offsetofend(struct vfio_irq_set, count);
1659
1660 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1661 return -EFAULT;
1662
1663 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1664 int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1665
1666 ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1667 VFIO_PCI_NUM_IRQS, &data_size);
1668 if (ret) {
1669 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1670 return -EINVAL;
1671 }
1672 if (data_size) {
1673 data = memdup_user((void __user *)(arg + minsz),
1674 data_size);
1675 if (IS_ERR(data))
1676 return PTR_ERR(data);
1677 }
1678 }
1679
1680 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1681 hdr.start, hdr.count, data);
1682 kfree(data);
1683
1684 return ret;
1685 } else if (cmd == VFIO_DEVICE_RESET) {
1686 intel_gvt_ops->vgpu_reset(vgpu);
1687 return 0;
1688 } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
1689 struct vfio_device_gfx_plane_info dmabuf;
1690 int ret = 0;
1691
1692 minsz = offsetofend(struct vfio_device_gfx_plane_info,
1693 dmabuf_id);
1694 if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
1695 return -EFAULT;
1696 if (dmabuf.argsz < minsz)
1697 return -EINVAL;
1698
1699 ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf);
1700 if (ret != 0)
1701 return ret;
1702
1703 return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
1704 -EFAULT : 0;
1705 } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
1706 __u32 dmabuf_id;
1707 __s32 dmabuf_fd;
1708
1709 if (get_user(dmabuf_id, (__u32 __user *)arg))
1710 return -EFAULT;
1711
1712 dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id);
1713 return dmabuf_fd;
1714
1715 }
1716
1717 return -ENOTTY;
1718}
1719
1720static ssize_t
1721vgpu_id_show(struct device *dev, struct device_attribute *attr,
1722 char *buf)
1723{
1724 struct mdev_device *mdev = mdev_from_dev(dev);
1725
1726 if (mdev) {
1727 struct intel_vgpu *vgpu = (struct intel_vgpu *)
1728 mdev_get_drvdata(mdev);
1729 return sprintf(buf, "%d\n", vgpu->id);
1730 }
1731 return sprintf(buf, "\n");
1732}
1733
1734static DEVICE_ATTR_RO(vgpu_id);
1735
1736static struct attribute *intel_vgpu_attrs[] = {
1737 &dev_attr_vgpu_id.attr,
1738 NULL
1739};
1740
1741static const struct attribute_group intel_vgpu_group = {
1742 .name = "intel_vgpu",
1743 .attrs = intel_vgpu_attrs,
1744};
1745
1746static const struct attribute_group *intel_vgpu_groups[] = {
1747 &intel_vgpu_group,
1748 NULL,
1749};
1750
1751static struct mdev_parent_ops intel_vgpu_ops = {
1752 .mdev_attr_groups = intel_vgpu_groups,
1753 .create = intel_vgpu_create,
1754 .remove = intel_vgpu_remove,
1755
1756 .open_device = intel_vgpu_open_device,
1757 .close_device = intel_vgpu_close_device,
1758
1759 .read = intel_vgpu_read,
1760 .write = intel_vgpu_write,
1761 .mmap = intel_vgpu_mmap,
1762 .ioctl = intel_vgpu_ioctl,
1763};
1764
1765static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
1766{
1767 int ret;
1768
1769 ret = intel_gvt_init_vgpu_type_groups((struct intel_gvt *)gvt);
1770 if (ret)
1771 return ret;
1772
1773 intel_gvt_ops = ops;
1774 intel_vgpu_ops.supported_type_groups = gvt_vgpu_type_groups;
1775
1776 ret = mdev_register_device(dev, &intel_vgpu_ops);
1777 if (ret)
1778 intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt);
1779
1780 return ret;
1781}
1782
1783static void kvmgt_host_exit(struct device *dev, void *gvt)
1784{
1785 mdev_unregister_device(dev);
1786 intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt);
1787}
1788
1789static int kvmgt_page_track_add(unsigned long handle, u64 gfn)
1790{
1791 struct kvmgt_guest_info *info;
1792 struct kvm *kvm;
1793 struct kvm_memory_slot *slot;
1794 int idx;
1795
1796 if (!handle_valid(handle))
1797 return -ESRCH;
1798
1799 info = (struct kvmgt_guest_info *)handle;
1800 kvm = info->kvm;
1801
1802 idx = srcu_read_lock(&kvm->srcu);
1803 slot = gfn_to_memslot(kvm, gfn);
1804 if (!slot) {
1805 srcu_read_unlock(&kvm->srcu, idx);
1806 return -EINVAL;
1807 }
1808
1809 write_lock(&kvm->mmu_lock);
1810
1811 if (kvmgt_gfn_is_write_protected(info, gfn))
1812 goto out;
1813
1814 kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1815 kvmgt_protect_table_add(info, gfn);
1816
1817out:
1818 write_unlock(&kvm->mmu_lock);
1819 srcu_read_unlock(&kvm->srcu, idx);
1820 return 0;
1821}
1822
1823static int kvmgt_page_track_remove(unsigned long handle, u64 gfn)
1824{
1825 struct kvmgt_guest_info *info;
1826 struct kvm *kvm;
1827 struct kvm_memory_slot *slot;
1828 int idx;
1829
1830 if (!handle_valid(handle))
1831 return 0;
1832
1833 info = (struct kvmgt_guest_info *)handle;
1834 kvm = info->kvm;
1835
1836 idx = srcu_read_lock(&kvm->srcu);
1837 slot = gfn_to_memslot(kvm, gfn);
1838 if (!slot) {
1839 srcu_read_unlock(&kvm->srcu, idx);
1840 return -EINVAL;
1841 }
1842
1843 write_lock(&kvm->mmu_lock);
1844
1845 if (!kvmgt_gfn_is_write_protected(info, gfn))
1846 goto out;
1847
1848 kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1849 kvmgt_protect_table_del(info, gfn);
1850
1851out:
1852 write_unlock(&kvm->mmu_lock);
1853 srcu_read_unlock(&kvm->srcu, idx);
1854 return 0;
1855}
1856
1857static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1858 const u8 *val, int len,
1859 struct kvm_page_track_notifier_node *node)
1860{
1861 struct kvmgt_guest_info *info = container_of(node,
1862 struct kvmgt_guest_info, track_node);
1863
1864 if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1865 intel_gvt_ops->write_protect_handler(info->vgpu, gpa,
1866 (void *)val, len);
1867}
1868
1869static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1870 struct kvm_memory_slot *slot,
1871 struct kvm_page_track_notifier_node *node)
1872{
1873 int i;
1874 gfn_t gfn;
1875 struct kvmgt_guest_info *info = container_of(node,
1876 struct kvmgt_guest_info, track_node);
1877
1878 write_lock(&kvm->mmu_lock);
1879 for (i = 0; i < slot->npages; i++) {
1880 gfn = slot->base_gfn + i;
1881 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1882 kvm_slot_page_track_remove_page(kvm, slot, gfn,
1883 KVM_PAGE_TRACK_WRITE);
1884 kvmgt_protect_table_del(info, gfn);
1885 }
1886 }
1887 write_unlock(&kvm->mmu_lock);
1888}
1889
1890static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
1891{
1892 struct intel_vgpu *itr;
1893 struct kvmgt_guest_info *info;
1894 int id;
1895 bool ret = false;
1896
1897 mutex_lock(&vgpu->gvt->lock);
1898 for_each_active_vgpu(vgpu->gvt, itr, id) {
1899 if (!handle_valid(itr->handle))
1900 continue;
1901
1902 info = (struct kvmgt_guest_info *)itr->handle;
1903 if (kvm && kvm == info->kvm) {
1904 ret = true;
1905 goto out;
1906 }
1907 }
1908out:
1909 mutex_unlock(&vgpu->gvt->lock);
1910 return ret;
1911}
1912
1913static int kvmgt_guest_init(struct mdev_device *mdev)
1914{
1915 struct kvmgt_guest_info *info;
1916 struct intel_vgpu *vgpu;
1917 struct kvmgt_vdev *vdev;
1918 struct kvm *kvm;
1919
1920 vgpu = mdev_get_drvdata(mdev);
1921 if (handle_valid(vgpu->handle))
1922 return -EEXIST;
1923
1924 vdev = kvmgt_vdev(vgpu);
1925 kvm = vdev->kvm;
1926 if (!kvm || kvm->mm != current->mm) {
1927 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
1928 return -ESRCH;
1929 }
1930
1931 if (__kvmgt_vgpu_exist(vgpu, kvm))
1932 return -EEXIST;
1933
1934 info = vzalloc(sizeof(struct kvmgt_guest_info));
1935 if (!info)
1936 return -ENOMEM;
1937
1938 vgpu->handle = (unsigned long)info;
1939 info->vgpu = vgpu;
1940 info->kvm = kvm;
1941 kvm_get_kvm(info->kvm);
1942
1943 kvmgt_protect_table_init(info);
1944 gvt_cache_init(vgpu);
1945
1946 info->track_node.track_write = kvmgt_page_track_write;
1947 info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
1948 kvm_page_track_register_notifier(kvm, &info->track_node);
1949
1950 debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
1951 &vdev->nr_cache_entries);
1952 return 0;
1953}
1954
1955static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1956{
1957 debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME,
1958 info->vgpu->debugfs));
1959
1960 kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
1961 kvm_put_kvm(info->kvm);
1962 kvmgt_protect_table_destroy(info);
1963 gvt_cache_destroy(info->vgpu);
1964 vfree(info);
1965
1966 return true;
1967}
1968
1969static int kvmgt_attach_vgpu(void *p_vgpu, unsigned long *handle)
1970{
1971 struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
1972
1973 vgpu->vdev = kzalloc(sizeof(struct kvmgt_vdev), GFP_KERNEL);
1974
1975 if (!vgpu->vdev)
1976 return -ENOMEM;
1977
1978 kvmgt_vdev(vgpu)->vgpu = vgpu;
1979
1980 return 0;
1981}
1982
1983static void kvmgt_detach_vgpu(void *p_vgpu)
1984{
1985 int i;
1986 struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
1987 struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu);
1988
1989 if (!vdev->region)
1990 return;
1991
1992 for (i = 0; i < vdev->num_regions; i++)
1993 if (vdev->region[i].ops->release)
1994 vdev->region[i].ops->release(vgpu,
1995 &vdev->region[i]);
1996 vdev->num_regions = 0;
1997 kfree(vdev->region);
1998 vdev->region = NULL;
1999
2000 kfree(vdev);
2001}
2002
2003static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
2004{
2005 struct kvmgt_guest_info *info;
2006 struct intel_vgpu *vgpu;
2007 struct kvmgt_vdev *vdev;
2008
2009 if (!handle_valid(handle))
2010 return -ESRCH;
2011
2012 info = (struct kvmgt_guest_info *)handle;
2013 vgpu = info->vgpu;
2014 vdev = kvmgt_vdev(vgpu);
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025 if (vdev->msi_trigger == NULL)
2026 return 0;
2027
2028 if (eventfd_signal(vdev->msi_trigger, 1) == 1)
2029 return 0;
2030
2031 return -EFAULT;
2032}
2033
2034static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
2035{
2036 struct kvmgt_guest_info *info;
2037 kvm_pfn_t pfn;
2038
2039 if (!handle_valid(handle))
2040 return INTEL_GVT_INVALID_ADDR;
2041
2042 info = (struct kvmgt_guest_info *)handle;
2043
2044 pfn = gfn_to_pfn(info->kvm, gfn);
2045 if (is_error_noslot_pfn(pfn))
2046 return INTEL_GVT_INVALID_ADDR;
2047
2048 return pfn;
2049}
2050
2051static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
2052 unsigned long size, dma_addr_t *dma_addr)
2053{
2054 struct intel_vgpu *vgpu;
2055 struct kvmgt_vdev *vdev;
2056 struct gvt_dma *entry;
2057 int ret;
2058
2059 if (!handle_valid(handle))
2060 return -EINVAL;
2061
2062 vgpu = ((struct kvmgt_guest_info *)handle)->vgpu;
2063 vdev = kvmgt_vdev(vgpu);
2064
2065 mutex_lock(&vdev->cache_lock);
2066
2067 entry = __gvt_cache_find_gfn(vgpu, gfn);
2068 if (!entry) {
2069 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
2070 if (ret)
2071 goto err_unlock;
2072
2073 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
2074 if (ret)
2075 goto err_unmap;
2076 } else if (entry->size != size) {
2077
2078 gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size);
2079 __gvt_cache_remove_entry(vgpu, entry);
2080
2081 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
2082 if (ret)
2083 goto err_unlock;
2084
2085 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
2086 if (ret)
2087 goto err_unmap;
2088 } else {
2089 kref_get(&entry->ref);
2090 *dma_addr = entry->dma_addr;
2091 }
2092
2093 mutex_unlock(&vdev->cache_lock);
2094 return 0;
2095
2096err_unmap:
2097 gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
2098err_unlock:
2099 mutex_unlock(&vdev->cache_lock);
2100 return ret;
2101}
2102
2103static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr)
2104{
2105 struct kvmgt_guest_info *info;
2106 struct kvmgt_vdev *vdev;
2107 struct gvt_dma *entry;
2108 int ret = 0;
2109
2110 if (!handle_valid(handle))
2111 return -ENODEV;
2112
2113 info = (struct kvmgt_guest_info *)handle;
2114 vdev = kvmgt_vdev(info->vgpu);
2115
2116 mutex_lock(&vdev->cache_lock);
2117 entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
2118 if (entry)
2119 kref_get(&entry->ref);
2120 else
2121 ret = -ENOMEM;
2122 mutex_unlock(&vdev->cache_lock);
2123
2124 return ret;
2125}
2126
2127static void __gvt_dma_release(struct kref *ref)
2128{
2129 struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
2130
2131 gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
2132 entry->size);
2133 __gvt_cache_remove_entry(entry->vgpu, entry);
2134}
2135
2136static void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr)
2137{
2138 struct intel_vgpu *vgpu;
2139 struct kvmgt_vdev *vdev;
2140 struct gvt_dma *entry;
2141
2142 if (!handle_valid(handle))
2143 return;
2144
2145 vgpu = ((struct kvmgt_guest_info *)handle)->vgpu;
2146 vdev = kvmgt_vdev(vgpu);
2147
2148 mutex_lock(&vdev->cache_lock);
2149 entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
2150 if (entry)
2151 kref_put(&entry->ref, __gvt_dma_release);
2152 mutex_unlock(&vdev->cache_lock);
2153}
2154
2155static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
2156 void *buf, unsigned long len, bool write)
2157{
2158 struct kvmgt_guest_info *info;
2159
2160 if (!handle_valid(handle))
2161 return -ESRCH;
2162
2163 info = (struct kvmgt_guest_info *)handle;
2164
2165 return vfio_dma_rw(kvmgt_vdev(info->vgpu)->vfio_group,
2166 gpa, buf, len, write);
2167}
2168
2169static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
2170 void *buf, unsigned long len)
2171{
2172 return kvmgt_rw_gpa(handle, gpa, buf, len, false);
2173}
2174
2175static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
2176 void *buf, unsigned long len)
2177{
2178 return kvmgt_rw_gpa(handle, gpa, buf, len, true);
2179}
2180
2181static unsigned long kvmgt_virt_to_pfn(void *addr)
2182{
2183 return PFN_DOWN(__pa(addr));
2184}
2185
2186static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn)
2187{
2188 struct kvmgt_guest_info *info;
2189 struct kvm *kvm;
2190 int idx;
2191 bool ret;
2192
2193 if (!handle_valid(handle))
2194 return false;
2195
2196 info = (struct kvmgt_guest_info *)handle;
2197 kvm = info->kvm;
2198
2199 idx = srcu_read_lock(&kvm->srcu);
2200 ret = kvm_is_visible_gfn(kvm, gfn);
2201 srcu_read_unlock(&kvm->srcu, idx);
2202
2203 return ret;
2204}
2205
2206static const struct intel_gvt_mpt kvmgt_mpt = {
2207 .type = INTEL_GVT_HYPERVISOR_KVM,
2208 .host_init = kvmgt_host_init,
2209 .host_exit = kvmgt_host_exit,
2210 .attach_vgpu = kvmgt_attach_vgpu,
2211 .detach_vgpu = kvmgt_detach_vgpu,
2212 .inject_msi = kvmgt_inject_msi,
2213 .from_virt_to_mfn = kvmgt_virt_to_pfn,
2214 .enable_page_track = kvmgt_page_track_add,
2215 .disable_page_track = kvmgt_page_track_remove,
2216 .read_gpa = kvmgt_read_gpa,
2217 .write_gpa = kvmgt_write_gpa,
2218 .gfn_to_mfn = kvmgt_gfn_to_pfn,
2219 .dma_map_guest_page = kvmgt_dma_map_guest_page,
2220 .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
2221 .dma_pin_guest_page = kvmgt_dma_pin_guest_page,
2222 .set_opregion = kvmgt_set_opregion,
2223 .set_edid = kvmgt_set_edid,
2224 .get_vfio_device = kvmgt_get_vfio_device,
2225 .put_vfio_device = kvmgt_put_vfio_device,
2226 .is_valid_gfn = kvmgt_is_valid_gfn,
2227};
2228
2229static int __init kvmgt_init(void)
2230{
2231 if (intel_gvt_register_hypervisor(&kvmgt_mpt) < 0)
2232 return -ENODEV;
2233 return 0;
2234}
2235
2236static void __exit kvmgt_exit(void)
2237{
2238 intel_gvt_unregister_hypervisor();
2239}
2240
2241module_init(kvmgt_init);
2242module_exit(kvmgt_exit);
2243
2244MODULE_LICENSE("GPL and additional rights");
2245MODULE_AUTHOR("Intel Corporation");
2246