linux/drivers/gpu/drm/i915/gvt/kvmgt.c
<<
>>
Prefs
   1/*
   2 * KVMGT - the implementation of Intel mediated pass-through framework for KVM
   3 *
   4 * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice (including the next
  14 * paragraph) shall be included in all copies or substantial portions of the
  15 * Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 * SOFTWARE.
  24 *
  25 * Authors:
  26 *    Kevin Tian <kevin.tian@intel.com>
  27 *    Jike Song <jike.song@intel.com>
  28 *    Xiaoguang Chen <xiaoguang.chen@intel.com>
  29 */
  30
  31#include <linux/init.h>
  32#include <linux/device.h>
  33#include <linux/mm.h>
  34#include <linux/mmu_context.h>
  35#include <linux/sched/mm.h>
  36#include <linux/types.h>
  37#include <linux/list.h>
  38#include <linux/rbtree.h>
  39#include <linux/spinlock.h>
  40#include <linux/eventfd.h>
  41#include <linux/uuid.h>
  42#include <linux/kvm_host.h>
  43#include <linux/vfio.h>
  44#include <linux/mdev.h>
  45#include <linux/debugfs.h>
  46
  47#include <linux/nospec.h>
  48
  49#include "i915_drv.h"
  50#include "gvt.h"
  51
  52static const struct intel_gvt_ops *intel_gvt_ops;
  53
  54/* helper macros copied from vfio-pci */
  55#define VFIO_PCI_OFFSET_SHIFT   40
  56#define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
  57#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
  58#define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
  59
  60#define EDID_BLOB_OFFSET (PAGE_SIZE/2)
  61
  62#define OPREGION_SIGNATURE "IntelGraphicsMem"
  63
  64struct vfio_region;
  65struct intel_vgpu_regops {
  66        size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
  67                        size_t count, loff_t *ppos, bool iswrite);
  68        void (*release)(struct intel_vgpu *vgpu,
  69                        struct vfio_region *region);
  70};
  71
  72struct vfio_region {
  73        u32                             type;
  74        u32                             subtype;
  75        size_t                          size;
  76        u32                             flags;
  77        const struct intel_vgpu_regops  *ops;
  78        void                            *data;
  79};
  80
  81struct vfio_edid_region {
  82        struct vfio_region_gfx_edid vfio_edid_regs;
  83        void *edid_blob;
  84};
  85
  86struct kvmgt_pgfn {
  87        gfn_t gfn;
  88        struct hlist_node hnode;
  89};
  90
  91struct kvmgt_guest_info {
  92        struct kvm *kvm;
  93        struct intel_vgpu *vgpu;
  94        struct kvm_page_track_notifier_node track_node;
  95#define NR_BKT (1 << 18)
  96        struct hlist_head ptable[NR_BKT];
  97#undef NR_BKT
  98        struct dentry *debugfs_cache_entries;
  99};
 100
 101struct gvt_dma {
 102        struct intel_vgpu *vgpu;
 103        struct rb_node gfn_node;
 104        struct rb_node dma_addr_node;
 105        gfn_t gfn;
 106        dma_addr_t dma_addr;
 107        unsigned long size;
 108        struct kref ref;
 109};
 110
 111static inline bool handle_valid(unsigned long handle)
 112{
 113        return !!(handle & ~0xff);
 114}
 115
 116static int kvmgt_guest_init(struct mdev_device *mdev);
 117static void intel_vgpu_release_work(struct work_struct *work);
 118static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
 119
 120static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
 121                unsigned long size)
 122{
 123        int total_pages;
 124        int npage;
 125        int ret;
 126
 127        total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
 128
 129        for (npage = 0; npage < total_pages; npage++) {
 130                unsigned long cur_gfn = gfn + npage;
 131
 132                ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1);
 133                WARN_ON(ret != 1);
 134        }
 135}
 136
 137/* Pin a normal or compound guest page for dma. */
 138static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
 139                unsigned long size, struct page **page)
 140{
 141        unsigned long base_pfn = 0;
 142        int total_pages;
 143        int npage;
 144        int ret;
 145
 146        total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
 147        /*
 148         * We pin the pages one-by-one to avoid allocating a big arrary
 149         * on stack to hold pfns.
 150         */
 151        for (npage = 0; npage < total_pages; npage++) {
 152                unsigned long cur_gfn = gfn + npage;
 153                unsigned long pfn;
 154
 155                ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1,
 156                                     IOMMU_READ | IOMMU_WRITE, &pfn);
 157                if (ret != 1) {
 158                        gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
 159                                     cur_gfn, ret);
 160                        goto err;
 161                }
 162
 163                if (!pfn_valid(pfn)) {
 164                        gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
 165                        npage++;
 166                        ret = -EFAULT;
 167                        goto err;
 168                }
 169
 170                if (npage == 0)
 171                        base_pfn = pfn;
 172                else if (base_pfn + npage != pfn) {
 173                        gvt_vgpu_err("The pages are not continuous\n");
 174                        ret = -EINVAL;
 175                        npage++;
 176                        goto err;
 177                }
 178        }
 179
 180        *page = pfn_to_page(base_pfn);
 181        return 0;
 182err:
 183        gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
 184        return ret;
 185}
 186
 187static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
 188                dma_addr_t *dma_addr, unsigned long size)
 189{
 190        struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
 191        struct page *page = NULL;
 192        int ret;
 193
 194        ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
 195        if (ret)
 196                return ret;
 197
 198        /* Setup DMA mapping. */
 199        *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL);
 200        if (dma_mapping_error(dev, *dma_addr)) {
 201                gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
 202                             page_to_pfn(page), ret);
 203                gvt_unpin_guest_page(vgpu, gfn, size);
 204                return -ENOMEM;
 205        }
 206
 207        return 0;
 208}
 209
 210static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
 211                dma_addr_t dma_addr, unsigned long size)
 212{
 213        struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
 214
 215        dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
 216        gvt_unpin_guest_page(vgpu, gfn, size);
 217}
 218
 219static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
 220                dma_addr_t dma_addr)
 221{
 222        struct rb_node *node = vgpu->vdev.dma_addr_cache.rb_node;
 223        struct gvt_dma *itr;
 224
 225        while (node) {
 226                itr = rb_entry(node, struct gvt_dma, dma_addr_node);
 227
 228                if (dma_addr < itr->dma_addr)
 229                        node = node->rb_left;
 230                else if (dma_addr > itr->dma_addr)
 231                        node = node->rb_right;
 232                else
 233                        return itr;
 234        }
 235        return NULL;
 236}
 237
 238static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
 239{
 240        struct rb_node *node = vgpu->vdev.gfn_cache.rb_node;
 241        struct gvt_dma *itr;
 242
 243        while (node) {
 244                itr = rb_entry(node, struct gvt_dma, gfn_node);
 245
 246                if (gfn < itr->gfn)
 247                        node = node->rb_left;
 248                else if (gfn > itr->gfn)
 249                        node = node->rb_right;
 250                else
 251                        return itr;
 252        }
 253        return NULL;
 254}
 255
 256static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
 257                dma_addr_t dma_addr, unsigned long size)
 258{
 259        struct gvt_dma *new, *itr;
 260        struct rb_node **link, *parent = NULL;
 261
 262        new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
 263        if (!new)
 264                return -ENOMEM;
 265
 266        new->vgpu = vgpu;
 267        new->gfn = gfn;
 268        new->dma_addr = dma_addr;
 269        new->size = size;
 270        kref_init(&new->ref);
 271
 272        /* gfn_cache maps gfn to struct gvt_dma. */
 273        link = &vgpu->vdev.gfn_cache.rb_node;
 274        while (*link) {
 275                parent = *link;
 276                itr = rb_entry(parent, struct gvt_dma, gfn_node);
 277
 278                if (gfn < itr->gfn)
 279                        link = &parent->rb_left;
 280                else
 281                        link = &parent->rb_right;
 282        }
 283        rb_link_node(&new->gfn_node, parent, link);
 284        rb_insert_color(&new->gfn_node, &vgpu->vdev.gfn_cache);
 285
 286        /* dma_addr_cache maps dma addr to struct gvt_dma. */
 287        parent = NULL;
 288        link = &vgpu->vdev.dma_addr_cache.rb_node;
 289        while (*link) {
 290                parent = *link;
 291                itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
 292
 293                if (dma_addr < itr->dma_addr)
 294                        link = &parent->rb_left;
 295                else
 296                        link = &parent->rb_right;
 297        }
 298        rb_link_node(&new->dma_addr_node, parent, link);
 299        rb_insert_color(&new->dma_addr_node, &vgpu->vdev.dma_addr_cache);
 300
 301        vgpu->vdev.nr_cache_entries++;
 302        return 0;
 303}
 304
 305static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
 306                                struct gvt_dma *entry)
 307{
 308        rb_erase(&entry->gfn_node, &vgpu->vdev.gfn_cache);
 309        rb_erase(&entry->dma_addr_node, &vgpu->vdev.dma_addr_cache);
 310        kfree(entry);
 311        vgpu->vdev.nr_cache_entries--;
 312}
 313
 314static void gvt_cache_destroy(struct intel_vgpu *vgpu)
 315{
 316        struct gvt_dma *dma;
 317        struct rb_node *node = NULL;
 318
 319        for (;;) {
 320                mutex_lock(&vgpu->vdev.cache_lock);
 321                node = rb_first(&vgpu->vdev.gfn_cache);
 322                if (!node) {
 323                        mutex_unlock(&vgpu->vdev.cache_lock);
 324                        break;
 325                }
 326                dma = rb_entry(node, struct gvt_dma, gfn_node);
 327                gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
 328                __gvt_cache_remove_entry(vgpu, dma);
 329                mutex_unlock(&vgpu->vdev.cache_lock);
 330        }
 331}
 332
 333static void gvt_cache_init(struct intel_vgpu *vgpu)
 334{
 335        vgpu->vdev.gfn_cache = RB_ROOT;
 336        vgpu->vdev.dma_addr_cache = RB_ROOT;
 337        vgpu->vdev.nr_cache_entries = 0;
 338        mutex_init(&vgpu->vdev.cache_lock);
 339}
 340
 341static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
 342{
 343        hash_init(info->ptable);
 344}
 345
 346static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
 347{
 348        struct kvmgt_pgfn *p;
 349        struct hlist_node *tmp;
 350        int i;
 351
 352        hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
 353                hash_del(&p->hnode);
 354                kfree(p);
 355        }
 356}
 357
 358static struct kvmgt_pgfn *
 359__kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
 360{
 361        struct kvmgt_pgfn *p, *res = NULL;
 362
 363        hash_for_each_possible(info->ptable, p, hnode, gfn) {
 364                if (gfn == p->gfn) {
 365                        res = p;
 366                        break;
 367                }
 368        }
 369
 370        return res;
 371}
 372
 373static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
 374                                gfn_t gfn)
 375{
 376        struct kvmgt_pgfn *p;
 377
 378        p = __kvmgt_protect_table_find(info, gfn);
 379        return !!p;
 380}
 381
 382static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
 383{
 384        struct kvmgt_pgfn *p;
 385
 386        if (kvmgt_gfn_is_write_protected(info, gfn))
 387                return;
 388
 389        p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
 390        if (WARN(!p, "gfn: 0x%llx\n", gfn))
 391                return;
 392
 393        p->gfn = gfn;
 394        hash_add(info->ptable, &p->hnode, gfn);
 395}
 396
 397static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
 398                                gfn_t gfn)
 399{
 400        struct kvmgt_pgfn *p;
 401
 402        p = __kvmgt_protect_table_find(info, gfn);
 403        if (p) {
 404                hash_del(&p->hnode);
 405                kfree(p);
 406        }
 407}
 408
 409static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
 410                size_t count, loff_t *ppos, bool iswrite)
 411{
 412        unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
 413                        VFIO_PCI_NUM_REGIONS;
 414        void *base = vgpu->vdev.region[i].data;
 415        loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
 416
 417        if (pos >= vgpu->vdev.region[i].size || iswrite) {
 418                gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
 419                return -EINVAL;
 420        }
 421        count = min(count, (size_t)(vgpu->vdev.region[i].size - pos));
 422        memcpy(buf, base + pos, count);
 423
 424        return count;
 425}
 426
 427static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
 428                struct vfio_region *region)
 429{
 430}
 431
 432static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
 433        .rw = intel_vgpu_reg_rw_opregion,
 434        .release = intel_vgpu_reg_release_opregion,
 435};
 436
 437static int handle_edid_regs(struct intel_vgpu *vgpu,
 438                        struct vfio_edid_region *region, char *buf,
 439                        size_t count, u16 offset, bool is_write)
 440{
 441        struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
 442        unsigned int data;
 443
 444        if (offset + count > sizeof(*regs))
 445                return -EINVAL;
 446
 447        if (count != 4)
 448                return -EINVAL;
 449
 450        if (is_write) {
 451                data = *((unsigned int *)buf);
 452                switch (offset) {
 453                case offsetof(struct vfio_region_gfx_edid, link_state):
 454                        if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
 455                                if (!drm_edid_block_valid(
 456                                        (u8 *)region->edid_blob,
 457                                        0,
 458                                        true,
 459                                        NULL)) {
 460                                        gvt_vgpu_err("invalid EDID blob\n");
 461                                        return -EINVAL;
 462                                }
 463                                intel_gvt_ops->emulate_hotplug(vgpu, true);
 464                        } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
 465                                intel_gvt_ops->emulate_hotplug(vgpu, false);
 466                        else {
 467                                gvt_vgpu_err("invalid EDID link state %d\n",
 468                                        regs->link_state);
 469                                return -EINVAL;
 470                        }
 471                        regs->link_state = data;
 472                        break;
 473                case offsetof(struct vfio_region_gfx_edid, edid_size):
 474                        if (data > regs->edid_max_size) {
 475                                gvt_vgpu_err("EDID size is bigger than %d!\n",
 476                                        regs->edid_max_size);
 477                                return -EINVAL;
 478                        }
 479                        regs->edid_size = data;
 480                        break;
 481                default:
 482                        /* read-only regs */
 483                        gvt_vgpu_err("write read-only EDID region at offset %d\n",
 484                                offset);
 485                        return -EPERM;
 486                }
 487        } else {
 488                memcpy(buf, (char *)regs + offset, count);
 489        }
 490
 491        return count;
 492}
 493
 494static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
 495                        size_t count, u16 offset, bool is_write)
 496{
 497        if (offset + count > region->vfio_edid_regs.edid_size)
 498                return -EINVAL;
 499
 500        if (is_write)
 501                memcpy(region->edid_blob + offset, buf, count);
 502        else
 503                memcpy(buf, region->edid_blob + offset, count);
 504
 505        return count;
 506}
 507
 508static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
 509                size_t count, loff_t *ppos, bool iswrite)
 510{
 511        int ret;
 512        unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
 513                        VFIO_PCI_NUM_REGIONS;
 514        struct vfio_edid_region *region =
 515                (struct vfio_edid_region *)vgpu->vdev.region[i].data;
 516        loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
 517
 518        if (pos < region->vfio_edid_regs.edid_offset) {
 519                ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
 520        } else {
 521                pos -= EDID_BLOB_OFFSET;
 522                ret = handle_edid_blob(region, buf, count, pos, iswrite);
 523        }
 524
 525        if (ret < 0)
 526                gvt_vgpu_err("failed to access EDID region\n");
 527
 528        return ret;
 529}
 530
 531static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
 532                                        struct vfio_region *region)
 533{
 534        kfree(region->data);
 535}
 536
 537static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
 538        .rw = intel_vgpu_reg_rw_edid,
 539        .release = intel_vgpu_reg_release_edid,
 540};
 541
 542static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
 543                unsigned int type, unsigned int subtype,
 544                const struct intel_vgpu_regops *ops,
 545                size_t size, u32 flags, void *data)
 546{
 547        struct vfio_region *region;
 548
 549        region = krealloc(vgpu->vdev.region,
 550                        (vgpu->vdev.num_regions + 1) * sizeof(*region),
 551                        GFP_KERNEL);
 552        if (!region)
 553                return -ENOMEM;
 554
 555        vgpu->vdev.region = region;
 556        vgpu->vdev.region[vgpu->vdev.num_regions].type = type;
 557        vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype;
 558        vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops;
 559        vgpu->vdev.region[vgpu->vdev.num_regions].size = size;
 560        vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags;
 561        vgpu->vdev.region[vgpu->vdev.num_regions].data = data;
 562        vgpu->vdev.num_regions++;
 563        return 0;
 564}
 565
 566static int kvmgt_get_vfio_device(void *p_vgpu)
 567{
 568        struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
 569
 570        vgpu->vdev.vfio_device = vfio_device_get_from_dev(
 571                mdev_dev(vgpu->vdev.mdev));
 572        if (!vgpu->vdev.vfio_device) {
 573                gvt_vgpu_err("failed to get vfio device\n");
 574                return -ENODEV;
 575        }
 576        return 0;
 577}
 578
 579
 580static int kvmgt_set_opregion(void *p_vgpu)
 581{
 582        struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
 583        void *base;
 584        int ret;
 585
 586        /* Each vgpu has its own opregion, although VFIO would create another
 587         * one later. This one is used to expose opregion to VFIO. And the
 588         * other one created by VFIO later, is used by guest actually.
 589         */
 590        base = vgpu_opregion(vgpu)->va;
 591        if (!base)
 592                return -ENOMEM;
 593
 594        if (memcmp(base, OPREGION_SIGNATURE, 16)) {
 595                memunmap(base);
 596                return -EINVAL;
 597        }
 598
 599        ret = intel_vgpu_register_reg(vgpu,
 600                        PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
 601                        VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
 602                        &intel_vgpu_regops_opregion, OPREGION_SIZE,
 603                        VFIO_REGION_INFO_FLAG_READ, base);
 604
 605        return ret;
 606}
 607
 608static int kvmgt_set_edid(void *p_vgpu, int port_num)
 609{
 610        struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
 611        struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
 612        struct vfio_edid_region *base;
 613        int ret;
 614
 615        base = kzalloc(sizeof(*base), GFP_KERNEL);
 616        if (!base)
 617                return -ENOMEM;
 618
 619        /* TODO: Add multi-port and EDID extension block support */
 620        base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
 621        base->vfio_edid_regs.edid_max_size = EDID_SIZE;
 622        base->vfio_edid_regs.edid_size = EDID_SIZE;
 623        base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
 624        base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
 625        base->edid_blob = port->edid->edid_block;
 626
 627        ret = intel_vgpu_register_reg(vgpu,
 628                        VFIO_REGION_TYPE_GFX,
 629                        VFIO_REGION_SUBTYPE_GFX_EDID,
 630                        &intel_vgpu_regops_edid, EDID_SIZE,
 631                        VFIO_REGION_INFO_FLAG_READ |
 632                        VFIO_REGION_INFO_FLAG_WRITE |
 633                        VFIO_REGION_INFO_FLAG_CAPS, base);
 634
 635        return ret;
 636}
 637
 638static void kvmgt_put_vfio_device(void *vgpu)
 639{
 640        if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device))
 641                return;
 642
 643        vfio_device_put(((struct intel_vgpu *)vgpu)->vdev.vfio_device);
 644}
 645
 646static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 647{
 648        struct intel_vgpu *vgpu = NULL;
 649        struct intel_vgpu_type *type;
 650        struct device *pdev;
 651        void *gvt;
 652        int ret;
 653
 654        pdev = mdev_parent_dev(mdev);
 655        gvt = kdev_to_i915(pdev)->gvt;
 656
 657        type = intel_gvt_ops->gvt_find_vgpu_type(gvt, kobject_name(kobj));
 658        if (!type) {
 659                gvt_vgpu_err("failed to find type %s to create\n",
 660                                                kobject_name(kobj));
 661                ret = -EINVAL;
 662                goto out;
 663        }
 664
 665        vgpu = intel_gvt_ops->vgpu_create(gvt, type);
 666        if (IS_ERR_OR_NULL(vgpu)) {
 667                ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
 668                gvt_err("failed to create intel vgpu: %d\n", ret);
 669                goto out;
 670        }
 671
 672        INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
 673
 674        vgpu->vdev.mdev = mdev;
 675        mdev_set_drvdata(mdev, vgpu);
 676
 677        gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
 678                     dev_name(mdev_dev(mdev)));
 679        ret = 0;
 680
 681out:
 682        return ret;
 683}
 684
 685static int intel_vgpu_remove(struct mdev_device *mdev)
 686{
 687        struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
 688
 689        if (handle_valid(vgpu->handle))
 690                return -EBUSY;
 691
 692        intel_gvt_ops->vgpu_destroy(vgpu);
 693        return 0;
 694}
 695
 696static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
 697                                     unsigned long action, void *data)
 698{
 699        struct intel_vgpu *vgpu = container_of(nb,
 700                                        struct intel_vgpu,
 701                                        vdev.iommu_notifier);
 702
 703        if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
 704                struct vfio_iommu_type1_dma_unmap *unmap = data;
 705                struct gvt_dma *entry;
 706                unsigned long iov_pfn, end_iov_pfn;
 707
 708                iov_pfn = unmap->iova >> PAGE_SHIFT;
 709                end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
 710
 711                mutex_lock(&vgpu->vdev.cache_lock);
 712                for (; iov_pfn < end_iov_pfn; iov_pfn++) {
 713                        entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
 714                        if (!entry)
 715                                continue;
 716
 717                        gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
 718                                           entry->size);
 719                        __gvt_cache_remove_entry(vgpu, entry);
 720                }
 721                mutex_unlock(&vgpu->vdev.cache_lock);
 722        }
 723
 724        return NOTIFY_OK;
 725}
 726
 727static int intel_vgpu_group_notifier(struct notifier_block *nb,
 728                                     unsigned long action, void *data)
 729{
 730        struct intel_vgpu *vgpu = container_of(nb,
 731                                        struct intel_vgpu,
 732                                        vdev.group_notifier);
 733
 734        /* the only action we care about */
 735        if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
 736                vgpu->vdev.kvm = data;
 737
 738                if (!data)
 739                        schedule_work(&vgpu->vdev.release_work);
 740        }
 741
 742        return NOTIFY_OK;
 743}
 744
 745static int intel_vgpu_open(struct mdev_device *mdev)
 746{
 747        struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
 748        unsigned long events;
 749        int ret;
 750
 751        vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
 752        vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
 753
 754        events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
 755        ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
 756                                &vgpu->vdev.iommu_notifier);
 757        if (ret != 0) {
 758                gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
 759                        ret);
 760                goto out;
 761        }
 762
 763        events = VFIO_GROUP_NOTIFY_SET_KVM;
 764        ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
 765                                &vgpu->vdev.group_notifier);
 766        if (ret != 0) {
 767                gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
 768                        ret);
 769                goto undo_iommu;
 770        }
 771
 772        /* Take a module reference as mdev core doesn't take
 773         * a reference for vendor driver.
 774         */
 775        if (!try_module_get(THIS_MODULE))
 776                goto undo_group;
 777
 778        ret = kvmgt_guest_init(mdev);
 779        if (ret)
 780                goto undo_group;
 781
 782        intel_gvt_ops->vgpu_activate(vgpu);
 783
 784        atomic_set(&vgpu->vdev.released, 0);
 785        return ret;
 786
 787undo_group:
 788        vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
 789                                        &vgpu->vdev.group_notifier);
 790
 791undo_iommu:
 792        vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
 793                                        &vgpu->vdev.iommu_notifier);
 794out:
 795        return ret;
 796}
 797
 798static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
 799{
 800        struct eventfd_ctx *trigger;
 801
 802        trigger = vgpu->vdev.msi_trigger;
 803        if (trigger) {
 804                eventfd_ctx_put(trigger);
 805                vgpu->vdev.msi_trigger = NULL;
 806        }
 807}
 808
 809static void __intel_vgpu_release(struct intel_vgpu *vgpu)
 810{
 811        struct kvmgt_guest_info *info;
 812        int ret;
 813
 814        if (!handle_valid(vgpu->handle))
 815                return;
 816
 817        if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
 818                return;
 819
 820        intel_gvt_ops->vgpu_release(vgpu);
 821
 822        ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
 823                                        &vgpu->vdev.iommu_notifier);
 824        WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
 825
 826        ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY,
 827                                        &vgpu->vdev.group_notifier);
 828        WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret);
 829
 830        /* dereference module reference taken at open */
 831        module_put(THIS_MODULE);
 832
 833        info = (struct kvmgt_guest_info *)vgpu->handle;
 834        kvmgt_guest_exit(info);
 835
 836        intel_vgpu_release_msi_eventfd_ctx(vgpu);
 837
 838        vgpu->vdev.kvm = NULL;
 839        vgpu->handle = 0;
 840}
 841
 842static void intel_vgpu_release(struct mdev_device *mdev)
 843{
 844        struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
 845
 846        __intel_vgpu_release(vgpu);
 847}
 848
 849static void intel_vgpu_release_work(struct work_struct *work)
 850{
 851        struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
 852                                        vdev.release_work);
 853
 854        __intel_vgpu_release(vgpu);
 855}
 856
 857static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
 858{
 859        u32 start_lo, start_hi;
 860        u32 mem_type;
 861
 862        start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
 863                        PCI_BASE_ADDRESS_MEM_MASK;
 864        mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
 865                        PCI_BASE_ADDRESS_MEM_TYPE_MASK;
 866
 867        switch (mem_type) {
 868        case PCI_BASE_ADDRESS_MEM_TYPE_64:
 869                start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
 870                                                + bar + 4));
 871                break;
 872        case PCI_BASE_ADDRESS_MEM_TYPE_32:
 873        case PCI_BASE_ADDRESS_MEM_TYPE_1M:
 874                /* 1M mem BAR treated as 32-bit BAR */
 875        default:
 876                /* mem unknown type treated as 32-bit BAR */
 877                start_hi = 0;
 878                break;
 879        }
 880
 881        return ((u64)start_hi << 32) | start_lo;
 882}
 883
 884static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off,
 885                             void *buf, unsigned int count, bool is_write)
 886{
 887        u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
 888        int ret;
 889
 890        if (is_write)
 891                ret = intel_gvt_ops->emulate_mmio_write(vgpu,
 892                                        bar_start + off, buf, count);
 893        else
 894                ret = intel_gvt_ops->emulate_mmio_read(vgpu,
 895                                        bar_start + off, buf, count);
 896        return ret;
 897}
 898
 899static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off)
 900{
 901        return off >= vgpu_aperture_offset(vgpu) &&
 902               off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
 903}
 904
 905static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
 906                void *buf, unsigned long count, bool is_write)
 907{
 908        void __iomem *aperture_va;
 909
 910        if (!intel_vgpu_in_aperture(vgpu, off) ||
 911            !intel_vgpu_in_aperture(vgpu, off + count)) {
 912                gvt_vgpu_err("Invalid aperture offset %llu\n", off);
 913                return -EINVAL;
 914        }
 915
 916        aperture_va = io_mapping_map_wc(&vgpu->gvt->dev_priv->ggtt.iomap,
 917                                        ALIGN_DOWN(off, PAGE_SIZE),
 918                                        count + offset_in_page(off));
 919        if (!aperture_va)
 920                return -EIO;
 921
 922        if (is_write)
 923                memcpy_toio(aperture_va + offset_in_page(off), buf, count);
 924        else
 925                memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
 926
 927        io_mapping_unmap(aperture_va);
 928
 929        return 0;
 930}
 931
 932static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 933                        size_t count, loff_t *ppos, bool is_write)
 934{
 935        struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
 936        unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 937        u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
 938        int ret = -EINVAL;
 939
 940
 941        if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) {
 942                gvt_vgpu_err("invalid index: %u\n", index);
 943                return -EINVAL;
 944        }
 945
 946        switch (index) {
 947        case VFIO_PCI_CONFIG_REGION_INDEX:
 948                if (is_write)
 949                        ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
 950                                                buf, count);
 951                else
 952                        ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
 953                                                buf, count);
 954                break;
 955        case VFIO_PCI_BAR0_REGION_INDEX:
 956                ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
 957                                        buf, count, is_write);
 958                break;
 959        case VFIO_PCI_BAR2_REGION_INDEX:
 960                ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write);
 961                break;
 962        case VFIO_PCI_BAR1_REGION_INDEX:
 963        case VFIO_PCI_BAR3_REGION_INDEX:
 964        case VFIO_PCI_BAR4_REGION_INDEX:
 965        case VFIO_PCI_BAR5_REGION_INDEX:
 966        case VFIO_PCI_VGA_REGION_INDEX:
 967        case VFIO_PCI_ROM_REGION_INDEX:
 968                break;
 969        default:
 970                if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions)
 971                        return -EINVAL;
 972
 973                index -= VFIO_PCI_NUM_REGIONS;
 974                return vgpu->vdev.region[index].ops->rw(vgpu, buf, count,
 975                                ppos, is_write);
 976        }
 977
 978        return ret == 0 ? count : ret;
 979}
 980
 981static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos)
 982{
 983        struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
 984        unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 985        struct intel_gvt *gvt = vgpu->gvt;
 986        int offset;
 987
 988        /* Only allow MMIO GGTT entry access */
 989        if (index != PCI_BASE_ADDRESS_0)
 990                return false;
 991
 992        offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
 993                intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
 994
 995        return (offset >= gvt->device_info.gtt_start_offset &&
 996                offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
 997                        true : false;
 998}
 999
1000static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
1001                        size_t count, loff_t *ppos)
1002{
1003        unsigned int done = 0;
1004        int ret;
1005
1006        while (count) {
1007                size_t filled;
1008
1009                /* Only support GGTT entry 8 bytes read */
1010                if (count >= 8 && !(*ppos % 8) &&
1011                        gtt_entry(mdev, ppos)) {
1012                        u64 val;
1013
1014                        ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1015                                        ppos, false);
1016                        if (ret <= 0)
1017                                goto read_err;
1018
1019                        if (copy_to_user(buf, &val, sizeof(val)))
1020                                goto read_err;
1021
1022                        filled = 8;
1023                } else if (count >= 4 && !(*ppos % 4)) {
1024                        u32 val;
1025
1026                        ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1027                                        ppos, false);
1028                        if (ret <= 0)
1029                                goto read_err;
1030
1031                        if (copy_to_user(buf, &val, sizeof(val)))
1032                                goto read_err;
1033
1034                        filled = 4;
1035                } else if (count >= 2 && !(*ppos % 2)) {
1036                        u16 val;
1037
1038                        ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1039                                        ppos, false);
1040                        if (ret <= 0)
1041                                goto read_err;
1042
1043                        if (copy_to_user(buf, &val, sizeof(val)))
1044                                goto read_err;
1045
1046                        filled = 2;
1047                } else {
1048                        u8 val;
1049
1050                        ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
1051                                        false);
1052                        if (ret <= 0)
1053                                goto read_err;
1054
1055                        if (copy_to_user(buf, &val, sizeof(val)))
1056                                goto read_err;
1057
1058                        filled = 1;
1059                }
1060
1061                count -= filled;
1062                done += filled;
1063                *ppos += filled;
1064                buf += filled;
1065        }
1066
1067        return done;
1068
1069read_err:
1070        return -EFAULT;
1071}
1072
1073static ssize_t intel_vgpu_write(struct mdev_device *mdev,
1074                                const char __user *buf,
1075                                size_t count, loff_t *ppos)
1076{
1077        unsigned int done = 0;
1078        int ret;
1079
1080        while (count) {
1081                size_t filled;
1082
1083                /* Only support GGTT entry 8 bytes write */
1084                if (count >= 8 && !(*ppos % 8) &&
1085                        gtt_entry(mdev, ppos)) {
1086                        u64 val;
1087
1088                        if (copy_from_user(&val, buf, sizeof(val)))
1089                                goto write_err;
1090
1091                        ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1092                                        ppos, true);
1093                        if (ret <= 0)
1094                                goto write_err;
1095
1096                        filled = 8;
1097                } else if (count >= 4 && !(*ppos % 4)) {
1098                        u32 val;
1099
1100                        if (copy_from_user(&val, buf, sizeof(val)))
1101                                goto write_err;
1102
1103                        ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
1104                                        ppos, true);
1105                        if (ret <= 0)
1106                                goto write_err;
1107
1108                        filled = 4;
1109                } else if (count >= 2 && !(*ppos % 2)) {
1110                        u16 val;
1111
1112                        if (copy_from_user(&val, buf, sizeof(val)))
1113                                goto write_err;
1114
1115                        ret = intel_vgpu_rw(mdev, (char *)&val,
1116                                        sizeof(val), ppos, true);
1117                        if (ret <= 0)
1118                                goto write_err;
1119
1120                        filled = 2;
1121                } else {
1122                        u8 val;
1123
1124                        if (copy_from_user(&val, buf, sizeof(val)))
1125                                goto write_err;
1126
1127                        ret = intel_vgpu_rw(mdev, &val, sizeof(val),
1128                                        ppos, true);
1129                        if (ret <= 0)
1130                                goto write_err;
1131
1132                        filled = 1;
1133                }
1134
1135                count -= filled;
1136                done += filled;
1137                *ppos += filled;
1138                buf += filled;
1139        }
1140
1141        return done;
1142write_err:
1143        return -EFAULT;
1144}
1145
1146static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
1147{
1148        unsigned int index;
1149        u64 virtaddr;
1150        unsigned long req_size, pgoff, req_start;
1151        pgprot_t pg_prot;
1152        struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1153
1154        index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
1155        if (index >= VFIO_PCI_ROM_REGION_INDEX)
1156                return -EINVAL;
1157
1158        if (vma->vm_end < vma->vm_start)
1159                return -EINVAL;
1160        if ((vma->vm_flags & VM_SHARED) == 0)
1161                return -EINVAL;
1162        if (index != VFIO_PCI_BAR2_REGION_INDEX)
1163                return -EINVAL;
1164
1165        pg_prot = vma->vm_page_prot;
1166        virtaddr = vma->vm_start;
1167        req_size = vma->vm_end - vma->vm_start;
1168        pgoff = vma->vm_pgoff &
1169                ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
1170        req_start = pgoff << PAGE_SHIFT;
1171
1172        if (!intel_vgpu_in_aperture(vgpu, req_start))
1173                return -EINVAL;
1174        if (req_start + req_size >
1175            vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
1176                return -EINVAL;
1177
1178        pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
1179
1180        return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
1181}
1182
1183static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
1184{
1185        if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
1186                return 1;
1187
1188        return 0;
1189}
1190
1191static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
1192                        unsigned int index, unsigned int start,
1193                        unsigned int count, u32 flags,
1194                        void *data)
1195{
1196        return 0;
1197}
1198
1199static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
1200                        unsigned int index, unsigned int start,
1201                        unsigned int count, u32 flags, void *data)
1202{
1203        return 0;
1204}
1205
1206static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
1207                unsigned int index, unsigned int start, unsigned int count,
1208                u32 flags, void *data)
1209{
1210        return 0;
1211}
1212
1213static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
1214                unsigned int index, unsigned int start, unsigned int count,
1215                u32 flags, void *data)
1216{
1217        struct eventfd_ctx *trigger;
1218
1219        if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
1220                int fd = *(int *)data;
1221
1222                trigger = eventfd_ctx_fdget(fd);
1223                if (IS_ERR(trigger)) {
1224                        gvt_vgpu_err("eventfd_ctx_fdget failed\n");
1225                        return PTR_ERR(trigger);
1226                }
1227                vgpu->vdev.msi_trigger = trigger;
1228        } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count)
1229                intel_vgpu_release_msi_eventfd_ctx(vgpu);
1230
1231        return 0;
1232}
1233
1234static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags,
1235                unsigned int index, unsigned int start, unsigned int count,
1236                void *data)
1237{
1238        int (*func)(struct intel_vgpu *vgpu, unsigned int index,
1239                        unsigned int start, unsigned int count, u32 flags,
1240                        void *data) = NULL;
1241
1242        switch (index) {
1243        case VFIO_PCI_INTX_IRQ_INDEX:
1244                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1245                case VFIO_IRQ_SET_ACTION_MASK:
1246                        func = intel_vgpu_set_intx_mask;
1247                        break;
1248                case VFIO_IRQ_SET_ACTION_UNMASK:
1249                        func = intel_vgpu_set_intx_unmask;
1250                        break;
1251                case VFIO_IRQ_SET_ACTION_TRIGGER:
1252                        func = intel_vgpu_set_intx_trigger;
1253                        break;
1254                }
1255                break;
1256        case VFIO_PCI_MSI_IRQ_INDEX:
1257                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1258                case VFIO_IRQ_SET_ACTION_MASK:
1259                case VFIO_IRQ_SET_ACTION_UNMASK:
1260                        /* XXX Need masking support exported */
1261                        break;
1262                case VFIO_IRQ_SET_ACTION_TRIGGER:
1263                        func = intel_vgpu_set_msi_trigger;
1264                        break;
1265                }
1266                break;
1267        }
1268
1269        if (!func)
1270                return -ENOTTY;
1271
1272        return func(vgpu, index, start, count, flags, data);
1273}
1274
1275static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
1276                             unsigned long arg)
1277{
1278        struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
1279        unsigned long minsz;
1280
1281        gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
1282
1283        if (cmd == VFIO_DEVICE_GET_INFO) {
1284                struct vfio_device_info info;
1285
1286                minsz = offsetofend(struct vfio_device_info, num_irqs);
1287
1288                if (copy_from_user(&info, (void __user *)arg, minsz))
1289                        return -EFAULT;
1290
1291                if (info.argsz < minsz)
1292                        return -EINVAL;
1293
1294                info.flags = VFIO_DEVICE_FLAGS_PCI;
1295                info.flags |= VFIO_DEVICE_FLAGS_RESET;
1296                info.num_regions = VFIO_PCI_NUM_REGIONS +
1297                                vgpu->vdev.num_regions;
1298                info.num_irqs = VFIO_PCI_NUM_IRQS;
1299
1300                return copy_to_user((void __user *)arg, &info, minsz) ?
1301                        -EFAULT : 0;
1302
1303        } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
1304                struct vfio_region_info info;
1305                struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
1306                unsigned int i;
1307                int ret;
1308                struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
1309                size_t size;
1310                int nr_areas = 1;
1311                int cap_type_id;
1312
1313                minsz = offsetofend(struct vfio_region_info, offset);
1314
1315                if (copy_from_user(&info, (void __user *)arg, minsz))
1316                        return -EFAULT;
1317
1318                if (info.argsz < minsz)
1319                        return -EINVAL;
1320
1321                switch (info.index) {
1322                case VFIO_PCI_CONFIG_REGION_INDEX:
1323                        info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1324                        info.size = vgpu->gvt->device_info.cfg_space_size;
1325                        info.flags = VFIO_REGION_INFO_FLAG_READ |
1326                                     VFIO_REGION_INFO_FLAG_WRITE;
1327                        break;
1328                case VFIO_PCI_BAR0_REGION_INDEX:
1329                        info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1330                        info.size = vgpu->cfg_space.bar[info.index].size;
1331                        if (!info.size) {
1332                                info.flags = 0;
1333                                break;
1334                        }
1335
1336                        info.flags = VFIO_REGION_INFO_FLAG_READ |
1337                                     VFIO_REGION_INFO_FLAG_WRITE;
1338                        break;
1339                case VFIO_PCI_BAR1_REGION_INDEX:
1340                        info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1341                        info.size = 0;
1342                        info.flags = 0;
1343                        break;
1344                case VFIO_PCI_BAR2_REGION_INDEX:
1345                        info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1346                        info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1347                                        VFIO_REGION_INFO_FLAG_MMAP |
1348                                        VFIO_REGION_INFO_FLAG_READ |
1349                                        VFIO_REGION_INFO_FLAG_WRITE;
1350                        info.size = gvt_aperture_sz(vgpu->gvt);
1351
1352                        size = sizeof(*sparse) +
1353                                        (nr_areas * sizeof(*sparse->areas));
1354                        sparse = kzalloc(size, GFP_KERNEL);
1355                        if (!sparse)
1356                                return -ENOMEM;
1357
1358                        sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1359                        sparse->header.version = 1;
1360                        sparse->nr_areas = nr_areas;
1361                        cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1362                        sparse->areas[0].offset =
1363                                        PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1364                        sparse->areas[0].size = vgpu_aperture_sz(vgpu);
1365                        break;
1366
1367                case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1368                        info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1369                        info.size = 0;
1370                        info.flags = 0;
1371
1372                        gvt_dbg_core("get region info bar:%d\n", info.index);
1373                        break;
1374
1375                case VFIO_PCI_ROM_REGION_INDEX:
1376                case VFIO_PCI_VGA_REGION_INDEX:
1377                        info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1378                        info.size = 0;
1379                        info.flags = 0;
1380
1381                        gvt_dbg_core("get region info index:%d\n", info.index);
1382                        break;
1383                default:
1384                        {
1385                                struct vfio_region_info_cap_type cap_type = {
1386                                        .header.id = VFIO_REGION_INFO_CAP_TYPE,
1387                                        .header.version = 1 };
1388
1389                                if (info.index >= VFIO_PCI_NUM_REGIONS +
1390                                                vgpu->vdev.num_regions)
1391                                        return -EINVAL;
1392                                info.index =
1393                                        array_index_nospec(info.index,
1394                                                        VFIO_PCI_NUM_REGIONS +
1395                                                        vgpu->vdev.num_regions);
1396
1397                                i = info.index - VFIO_PCI_NUM_REGIONS;
1398
1399                                info.offset =
1400                                        VFIO_PCI_INDEX_TO_OFFSET(info.index);
1401                                info.size = vgpu->vdev.region[i].size;
1402                                info.flags = vgpu->vdev.region[i].flags;
1403
1404                                cap_type.type = vgpu->vdev.region[i].type;
1405                                cap_type.subtype = vgpu->vdev.region[i].subtype;
1406
1407                                ret = vfio_info_add_capability(&caps,
1408                                                        &cap_type.header,
1409                                                        sizeof(cap_type));
1410                                if (ret)
1411                                        return ret;
1412                        }
1413                }
1414
1415                if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1416                        switch (cap_type_id) {
1417                        case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1418                                ret = vfio_info_add_capability(&caps,
1419                                        &sparse->header, sizeof(*sparse) +
1420                                        (sparse->nr_areas *
1421                                                sizeof(*sparse->areas)));
1422                                if (ret) {
1423                                        kfree(sparse);
1424                                        return ret;
1425                                }
1426                                break;
1427                        default:
1428                                kfree(sparse);
1429                                return -EINVAL;
1430                        }
1431                }
1432
1433                if (caps.size) {
1434                        info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
1435                        if (info.argsz < sizeof(info) + caps.size) {
1436                                info.argsz = sizeof(info) + caps.size;
1437                                info.cap_offset = 0;
1438                        } else {
1439                                vfio_info_cap_shift(&caps, sizeof(info));
1440                                if (copy_to_user((void __user *)arg +
1441                                                  sizeof(info), caps.buf,
1442                                                  caps.size)) {
1443                                        kfree(caps.buf);
1444                                        kfree(sparse);
1445                                        return -EFAULT;
1446                                }
1447                                info.cap_offset = sizeof(info);
1448                        }
1449
1450                        kfree(caps.buf);
1451                }
1452
1453                kfree(sparse);
1454                return copy_to_user((void __user *)arg, &info, minsz) ?
1455                        -EFAULT : 0;
1456        } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1457                struct vfio_irq_info info;
1458
1459                minsz = offsetofend(struct vfio_irq_info, count);
1460
1461                if (copy_from_user(&info, (void __user *)arg, minsz))
1462                        return -EFAULT;
1463
1464                if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1465                        return -EINVAL;
1466
1467                switch (info.index) {
1468                case VFIO_PCI_INTX_IRQ_INDEX:
1469                case VFIO_PCI_MSI_IRQ_INDEX:
1470                        break;
1471                default:
1472                        return -EINVAL;
1473                }
1474
1475                info.flags = VFIO_IRQ_INFO_EVENTFD;
1476
1477                info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1478
1479                if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1480                        info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1481                                       VFIO_IRQ_INFO_AUTOMASKED);
1482                else
1483                        info.flags |= VFIO_IRQ_INFO_NORESIZE;
1484
1485                return copy_to_user((void __user *)arg, &info, minsz) ?
1486                        -EFAULT : 0;
1487        } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1488                struct vfio_irq_set hdr;
1489                u8 *data = NULL;
1490                int ret = 0;
1491                size_t data_size = 0;
1492
1493                minsz = offsetofend(struct vfio_irq_set, count);
1494
1495                if (copy_from_user(&hdr, (void __user *)arg, minsz))
1496                        return -EFAULT;
1497
1498                if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1499                        int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1500
1501                        ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1502                                                VFIO_PCI_NUM_IRQS, &data_size);
1503                        if (ret) {
1504                                gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1505                                return -EINVAL;
1506                        }
1507                        if (data_size) {
1508                                data = memdup_user((void __user *)(arg + minsz),
1509                                                   data_size);
1510                                if (IS_ERR(data))
1511                                        return PTR_ERR(data);
1512                        }
1513                }
1514
1515                ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1516                                        hdr.start, hdr.count, data);
1517                kfree(data);
1518
1519                return ret;
1520        } else if (cmd == VFIO_DEVICE_RESET) {
1521                intel_gvt_ops->vgpu_reset(vgpu);
1522                return 0;
1523        } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
1524                struct vfio_device_gfx_plane_info dmabuf;
1525                int ret = 0;
1526
1527                minsz = offsetofend(struct vfio_device_gfx_plane_info,
1528                                    dmabuf_id);
1529                if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
1530                        return -EFAULT;
1531                if (dmabuf.argsz < minsz)
1532                        return -EINVAL;
1533
1534                ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf);
1535                if (ret != 0)
1536                        return ret;
1537
1538                return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
1539                                                                -EFAULT : 0;
1540        } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
1541                __u32 dmabuf_id;
1542                __s32 dmabuf_fd;
1543
1544                if (get_user(dmabuf_id, (__u32 __user *)arg))
1545                        return -EFAULT;
1546
1547                dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id);
1548                return dmabuf_fd;
1549
1550        }
1551
1552        return -ENOTTY;
1553}
1554
1555static ssize_t
1556vgpu_id_show(struct device *dev, struct device_attribute *attr,
1557             char *buf)
1558{
1559        struct mdev_device *mdev = mdev_from_dev(dev);
1560
1561        if (mdev) {
1562                struct intel_vgpu *vgpu = (struct intel_vgpu *)
1563                        mdev_get_drvdata(mdev);
1564                return sprintf(buf, "%d\n", vgpu->id);
1565        }
1566        return sprintf(buf, "\n");
1567}
1568
1569static ssize_t
1570hw_id_show(struct device *dev, struct device_attribute *attr,
1571           char *buf)
1572{
1573        struct mdev_device *mdev = mdev_from_dev(dev);
1574
1575        if (mdev) {
1576                struct intel_vgpu *vgpu = (struct intel_vgpu *)
1577                        mdev_get_drvdata(mdev);
1578                return sprintf(buf, "%u\n",
1579                               vgpu->submission.shadow[0]->gem_context->hw_id);
1580        }
1581        return sprintf(buf, "\n");
1582}
1583
1584static DEVICE_ATTR_RO(vgpu_id);
1585static DEVICE_ATTR_RO(hw_id);
1586
1587static struct attribute *intel_vgpu_attrs[] = {
1588        &dev_attr_vgpu_id.attr,
1589        &dev_attr_hw_id.attr,
1590        NULL
1591};
1592
1593static const struct attribute_group intel_vgpu_group = {
1594        .name = "intel_vgpu",
1595        .attrs = intel_vgpu_attrs,
1596};
1597
1598static const struct attribute_group *intel_vgpu_groups[] = {
1599        &intel_vgpu_group,
1600        NULL,
1601};
1602
1603static struct mdev_parent_ops intel_vgpu_ops = {
1604        .mdev_attr_groups       = intel_vgpu_groups,
1605        .create                 = intel_vgpu_create,
1606        .remove                 = intel_vgpu_remove,
1607
1608        .open                   = intel_vgpu_open,
1609        .release                = intel_vgpu_release,
1610
1611        .read                   = intel_vgpu_read,
1612        .write                  = intel_vgpu_write,
1613        .mmap                   = intel_vgpu_mmap,
1614        .ioctl                  = intel_vgpu_ioctl,
1615};
1616
1617static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
1618{
1619        struct attribute **kvm_type_attrs;
1620        struct attribute_group **kvm_vgpu_type_groups;
1621
1622        intel_gvt_ops = ops;
1623        if (!intel_gvt_ops->get_gvt_attrs(&kvm_type_attrs,
1624                        &kvm_vgpu_type_groups))
1625                return -EFAULT;
1626        intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups;
1627
1628        return mdev_register_device(dev, &intel_vgpu_ops);
1629}
1630
1631static void kvmgt_host_exit(struct device *dev)
1632{
1633        mdev_unregister_device(dev);
1634}
1635
1636static int kvmgt_page_track_add(unsigned long handle, u64 gfn)
1637{
1638        struct kvmgt_guest_info *info;
1639        struct kvm *kvm;
1640        struct kvm_memory_slot *slot;
1641        int idx;
1642
1643        if (!handle_valid(handle))
1644                return -ESRCH;
1645
1646        info = (struct kvmgt_guest_info *)handle;
1647        kvm = info->kvm;
1648
1649        idx = srcu_read_lock(&kvm->srcu);
1650        slot = gfn_to_memslot(kvm, gfn);
1651        if (!slot) {
1652                srcu_read_unlock(&kvm->srcu, idx);
1653                return -EINVAL;
1654        }
1655
1656        spin_lock(&kvm->mmu_lock);
1657
1658        if (kvmgt_gfn_is_write_protected(info, gfn))
1659                goto out;
1660
1661        kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1662        kvmgt_protect_table_add(info, gfn);
1663
1664out:
1665        spin_unlock(&kvm->mmu_lock);
1666        srcu_read_unlock(&kvm->srcu, idx);
1667        return 0;
1668}
1669
1670static int kvmgt_page_track_remove(unsigned long handle, u64 gfn)
1671{
1672        struct kvmgt_guest_info *info;
1673        struct kvm *kvm;
1674        struct kvm_memory_slot *slot;
1675        int idx;
1676
1677        if (!handle_valid(handle))
1678                return 0;
1679
1680        info = (struct kvmgt_guest_info *)handle;
1681        kvm = info->kvm;
1682
1683        idx = srcu_read_lock(&kvm->srcu);
1684        slot = gfn_to_memslot(kvm, gfn);
1685        if (!slot) {
1686                srcu_read_unlock(&kvm->srcu, idx);
1687                return -EINVAL;
1688        }
1689
1690        spin_lock(&kvm->mmu_lock);
1691
1692        if (!kvmgt_gfn_is_write_protected(info, gfn))
1693                goto out;
1694
1695        kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1696        kvmgt_protect_table_del(info, gfn);
1697
1698out:
1699        spin_unlock(&kvm->mmu_lock);
1700        srcu_read_unlock(&kvm->srcu, idx);
1701        return 0;
1702}
1703
1704static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1705                const u8 *val, int len,
1706                struct kvm_page_track_notifier_node *node)
1707{
1708        struct kvmgt_guest_info *info = container_of(node,
1709                                        struct kvmgt_guest_info, track_node);
1710
1711        if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1712                intel_gvt_ops->write_protect_handler(info->vgpu, gpa,
1713                                                     (void *)val, len);
1714}
1715
1716static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1717                struct kvm_memory_slot *slot,
1718                struct kvm_page_track_notifier_node *node)
1719{
1720        int i;
1721        gfn_t gfn;
1722        struct kvmgt_guest_info *info = container_of(node,
1723                                        struct kvmgt_guest_info, track_node);
1724
1725        spin_lock(&kvm->mmu_lock);
1726        for (i = 0; i < slot->npages; i++) {
1727                gfn = slot->base_gfn + i;
1728                if (kvmgt_gfn_is_write_protected(info, gfn)) {
1729                        kvm_slot_page_track_remove_page(kvm, slot, gfn,
1730                                                KVM_PAGE_TRACK_WRITE);
1731                        kvmgt_protect_table_del(info, gfn);
1732                }
1733        }
1734        spin_unlock(&kvm->mmu_lock);
1735}
1736
1737static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
1738{
1739        struct intel_vgpu *itr;
1740        struct kvmgt_guest_info *info;
1741        int id;
1742        bool ret = false;
1743
1744        mutex_lock(&vgpu->gvt->lock);
1745        for_each_active_vgpu(vgpu->gvt, itr, id) {
1746                if (!handle_valid(itr->handle))
1747                        continue;
1748
1749                info = (struct kvmgt_guest_info *)itr->handle;
1750                if (kvm && kvm == info->kvm) {
1751                        ret = true;
1752                        goto out;
1753                }
1754        }
1755out:
1756        mutex_unlock(&vgpu->gvt->lock);
1757        return ret;
1758}
1759
1760static int kvmgt_guest_init(struct mdev_device *mdev)
1761{
1762        struct kvmgt_guest_info *info;
1763        struct intel_vgpu *vgpu;
1764        struct kvm *kvm;
1765
1766        vgpu = mdev_get_drvdata(mdev);
1767        if (handle_valid(vgpu->handle))
1768                return -EEXIST;
1769
1770        kvm = vgpu->vdev.kvm;
1771        if (!kvm || kvm->mm != current->mm) {
1772                gvt_vgpu_err("KVM is required to use Intel vGPU\n");
1773                return -ESRCH;
1774        }
1775
1776        if (__kvmgt_vgpu_exist(vgpu, kvm))
1777                return -EEXIST;
1778
1779        info = vzalloc(sizeof(struct kvmgt_guest_info));
1780        if (!info)
1781                return -ENOMEM;
1782
1783        vgpu->handle = (unsigned long)info;
1784        info->vgpu = vgpu;
1785        info->kvm = kvm;
1786        kvm_get_kvm(info->kvm);
1787
1788        kvmgt_protect_table_init(info);
1789        gvt_cache_init(vgpu);
1790
1791        init_completion(&vgpu->vblank_done);
1792
1793        info->track_node.track_write = kvmgt_page_track_write;
1794        info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
1795        kvm_page_track_register_notifier(kvm, &info->track_node);
1796
1797        info->debugfs_cache_entries = debugfs_create_ulong(
1798                                                "kvmgt_nr_cache_entries",
1799                                                0444, vgpu->debugfs,
1800                                                &vgpu->vdev.nr_cache_entries);
1801        if (!info->debugfs_cache_entries)
1802                gvt_vgpu_err("Cannot create kvmgt debugfs entry\n");
1803
1804        return 0;
1805}
1806
1807static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1808{
1809        debugfs_remove(info->debugfs_cache_entries);
1810
1811        kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
1812        kvm_put_kvm(info->kvm);
1813        kvmgt_protect_table_destroy(info);
1814        gvt_cache_destroy(info->vgpu);
1815        vfree(info);
1816
1817        return true;
1818}
1819
1820static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
1821{
1822        /* nothing to do here */
1823        return 0;
1824}
1825
1826static void kvmgt_detach_vgpu(void *p_vgpu)
1827{
1828        int i;
1829        struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
1830
1831        if (!vgpu->vdev.region)
1832                return;
1833
1834        for (i = 0; i < vgpu->vdev.num_regions; i++)
1835                if (vgpu->vdev.region[i].ops->release)
1836                        vgpu->vdev.region[i].ops->release(vgpu,
1837                                        &vgpu->vdev.region[i]);
1838        vgpu->vdev.num_regions = 0;
1839        kfree(vgpu->vdev.region);
1840        vgpu->vdev.region = NULL;
1841}
1842
1843static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
1844{
1845        struct kvmgt_guest_info *info;
1846        struct intel_vgpu *vgpu;
1847
1848        if (!handle_valid(handle))
1849                return -ESRCH;
1850
1851        info = (struct kvmgt_guest_info *)handle;
1852        vgpu = info->vgpu;
1853
1854        /*
1855         * When guest is poweroff, msi_trigger is set to NULL, but vgpu's
1856         * config and mmio register isn't restored to default during guest
1857         * poweroff. If this vgpu is still used in next vm, this vgpu's pipe
1858         * may be enabled, then once this vgpu is active, it will get inject
1859         * vblank interrupt request. But msi_trigger is null until msi is
1860         * enabled by guest. so if msi_trigger is null, success is still
1861         * returned and don't inject interrupt into guest.
1862         */
1863        if (vgpu->vdev.msi_trigger == NULL)
1864                return 0;
1865
1866        if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
1867                return 0;
1868
1869        return -EFAULT;
1870}
1871
1872static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
1873{
1874        struct kvmgt_guest_info *info;
1875        kvm_pfn_t pfn;
1876
1877        if (!handle_valid(handle))
1878                return INTEL_GVT_INVALID_ADDR;
1879
1880        info = (struct kvmgt_guest_info *)handle;
1881
1882        pfn = gfn_to_pfn(info->kvm, gfn);
1883        if (is_error_noslot_pfn(pfn))
1884                return INTEL_GVT_INVALID_ADDR;
1885
1886        return pfn;
1887}
1888
1889static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
1890                unsigned long size, dma_addr_t *dma_addr)
1891{
1892        struct kvmgt_guest_info *info;
1893        struct intel_vgpu *vgpu;
1894        struct gvt_dma *entry;
1895        int ret;
1896
1897        if (!handle_valid(handle))
1898                return -EINVAL;
1899
1900        info = (struct kvmgt_guest_info *)handle;
1901        vgpu = info->vgpu;
1902
1903        mutex_lock(&info->vgpu->vdev.cache_lock);
1904
1905        entry = __gvt_cache_find_gfn(info->vgpu, gfn);
1906        if (!entry) {
1907                ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1908                if (ret)
1909                        goto err_unlock;
1910
1911                ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size);
1912                if (ret)
1913                        goto err_unmap;
1914        } else if (entry->size != size) {
1915                /* the same gfn with different size: unmap and re-map */
1916                gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size);
1917                __gvt_cache_remove_entry(vgpu, entry);
1918
1919                ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1920                if (ret)
1921                        goto err_unlock;
1922
1923                ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size);
1924                if (ret)
1925                        goto err_unmap;
1926        } else {
1927                kref_get(&entry->ref);
1928                *dma_addr = entry->dma_addr;
1929        }
1930
1931        mutex_unlock(&info->vgpu->vdev.cache_lock);
1932        return 0;
1933
1934err_unmap:
1935        gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
1936err_unlock:
1937        mutex_unlock(&info->vgpu->vdev.cache_lock);
1938        return ret;
1939}
1940
1941static void __gvt_dma_release(struct kref *ref)
1942{
1943        struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
1944
1945        gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
1946                           entry->size);
1947        __gvt_cache_remove_entry(entry->vgpu, entry);
1948}
1949
1950static void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr)
1951{
1952        struct kvmgt_guest_info *info;
1953        struct gvt_dma *entry;
1954
1955        if (!handle_valid(handle))
1956                return;
1957
1958        info = (struct kvmgt_guest_info *)handle;
1959
1960        mutex_lock(&info->vgpu->vdev.cache_lock);
1961        entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
1962        if (entry)
1963                kref_put(&entry->ref, __gvt_dma_release);
1964        mutex_unlock(&info->vgpu->vdev.cache_lock);
1965}
1966
1967static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
1968                        void *buf, unsigned long len, bool write)
1969{
1970        struct kvmgt_guest_info *info;
1971        struct kvm *kvm;
1972        int idx, ret;
1973        bool kthread = current->mm == NULL;
1974
1975        if (!handle_valid(handle))
1976                return -ESRCH;
1977
1978        info = (struct kvmgt_guest_info *)handle;
1979        kvm = info->kvm;
1980
1981        if (kthread) {
1982                if (!mmget_not_zero(kvm->mm))
1983                        return -EFAULT;
1984                use_mm(kvm->mm);
1985        }
1986
1987        idx = srcu_read_lock(&kvm->srcu);
1988        ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
1989                      kvm_read_guest(kvm, gpa, buf, len);
1990        srcu_read_unlock(&kvm->srcu, idx);
1991
1992        if (kthread) {
1993                unuse_mm(kvm->mm);
1994                mmput(kvm->mm);
1995        }
1996
1997        return ret;
1998}
1999
2000static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
2001                        void *buf, unsigned long len)
2002{
2003        return kvmgt_rw_gpa(handle, gpa, buf, len, false);
2004}
2005
2006static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
2007                        void *buf, unsigned long len)
2008{
2009        return kvmgt_rw_gpa(handle, gpa, buf, len, true);
2010}
2011
2012static unsigned long kvmgt_virt_to_pfn(void *addr)
2013{
2014        return PFN_DOWN(__pa(addr));
2015}
2016
2017static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn)
2018{
2019        struct kvmgt_guest_info *info;
2020        struct kvm *kvm;
2021        int idx;
2022        bool ret;
2023
2024        if (!handle_valid(handle))
2025                return false;
2026
2027        info = (struct kvmgt_guest_info *)handle;
2028        kvm = info->kvm;
2029
2030        idx = srcu_read_lock(&kvm->srcu);
2031        ret = kvm_is_visible_gfn(kvm, gfn);
2032        srcu_read_unlock(&kvm->srcu, idx);
2033
2034        return ret;
2035}
2036
2037static struct intel_gvt_mpt kvmgt_mpt = {
2038        .type = INTEL_GVT_HYPERVISOR_KVM,
2039        .host_init = kvmgt_host_init,
2040        .host_exit = kvmgt_host_exit,
2041        .attach_vgpu = kvmgt_attach_vgpu,
2042        .detach_vgpu = kvmgt_detach_vgpu,
2043        .inject_msi = kvmgt_inject_msi,
2044        .from_virt_to_mfn = kvmgt_virt_to_pfn,
2045        .enable_page_track = kvmgt_page_track_add,
2046        .disable_page_track = kvmgt_page_track_remove,
2047        .read_gpa = kvmgt_read_gpa,
2048        .write_gpa = kvmgt_write_gpa,
2049        .gfn_to_mfn = kvmgt_gfn_to_pfn,
2050        .dma_map_guest_page = kvmgt_dma_map_guest_page,
2051        .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
2052        .set_opregion = kvmgt_set_opregion,
2053        .set_edid = kvmgt_set_edid,
2054        .get_vfio_device = kvmgt_get_vfio_device,
2055        .put_vfio_device = kvmgt_put_vfio_device,
2056        .is_valid_gfn = kvmgt_is_valid_gfn,
2057};
2058
2059static int __init kvmgt_init(void)
2060{
2061        if (intel_gvt_register_hypervisor(&kvmgt_mpt) < 0)
2062                return -ENODEV;
2063        return 0;
2064}
2065
2066static void __exit kvmgt_exit(void)
2067{
2068        intel_gvt_unregister_hypervisor();
2069}
2070
2071module_init(kvmgt_init);
2072module_exit(kvmgt_exit);
2073
2074MODULE_LICENSE("GPL and additional rights");
2075MODULE_AUTHOR("Intel Corporation");
2076