linux/virt/kvm/assigned-dev.c
<<
>>
Prefs
   1/*
   2 * Kernel-based Virtual Machine - device assignment support
   3 *
   4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2.  See
   7 * the COPYING file in the top-level directory.
   8 *
   9 */
  10
  11#include <linux/kvm_host.h>
  12#include <linux/kvm.h>
  13#include <linux/uaccess.h>
  14#include <linux/vmalloc.h>
  15#include <linux/errno.h>
  16#include <linux/spinlock.h>
  17#include <linux/pci.h>
  18#include <linux/interrupt.h>
  19#include <linux/slab.h>
  20#include <linux/namei.h>
  21#include <linux/fs.h>
  22#include "irq.h"
  23
  24static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
  25                                                      int assigned_dev_id)
  26{
  27        struct list_head *ptr;
  28        struct kvm_assigned_dev_kernel *match;
  29
  30        list_for_each(ptr, head) {
  31                match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
  32                if (match->assigned_dev_id == assigned_dev_id)
  33                        return match;
  34        }
  35        return NULL;
  36}
  37
  38static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
  39                                    *assigned_dev, int irq)
  40{
  41        int i, index;
  42        struct msix_entry *host_msix_entries;
  43
  44        host_msix_entries = assigned_dev->host_msix_entries;
  45
  46        index = -1;
  47        for (i = 0; i < assigned_dev->entries_nr; i++)
  48                if (irq == host_msix_entries[i].vector) {
  49                        index = i;
  50                        break;
  51                }
  52        if (index < 0)
  53                printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
  54
  55        return index;
  56}
  57
  58static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
  59{
  60        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
  61        int ret;
  62
  63        spin_lock(&assigned_dev->intx_lock);
  64        if (pci_check_and_mask_intx(assigned_dev->dev)) {
  65                assigned_dev->host_irq_disabled = true;
  66                ret = IRQ_WAKE_THREAD;
  67        } else
  68                ret = IRQ_NONE;
  69        spin_unlock(&assigned_dev->intx_lock);
  70
  71        return ret;
  72}
  73
  74static void
  75kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
  76                                 int vector)
  77{
  78        if (unlikely(assigned_dev->irq_requested_type &
  79                     KVM_DEV_IRQ_GUEST_INTX)) {
  80                spin_lock(&assigned_dev->intx_mask_lock);
  81                if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
  82                        kvm_set_irq(assigned_dev->kvm,
  83                                    assigned_dev->irq_source_id, vector, 1);
  84                spin_unlock(&assigned_dev->intx_mask_lock);
  85        } else
  86                kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
  87                            vector, 1);
  88}
  89
  90static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
  91{
  92        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
  93
  94        if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
  95                spin_lock_irq(&assigned_dev->intx_lock);
  96                disable_irq_nosync(irq);
  97                assigned_dev->host_irq_disabled = true;
  98                spin_unlock_irq(&assigned_dev->intx_lock);
  99        }
 100
 101        kvm_assigned_dev_raise_guest_irq(assigned_dev,
 102                                         assigned_dev->guest_irq);
 103
 104        return IRQ_HANDLED;
 105}
 106
 107#ifdef __KVM_HAVE_MSI
 108static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
 109{
 110        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
 111        int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
 112                                       assigned_dev->irq_source_id,
 113                                       assigned_dev->guest_irq, 1);
 114        return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
 115}
 116
 117static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
 118{
 119        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
 120
 121        kvm_assigned_dev_raise_guest_irq(assigned_dev,
 122                                         assigned_dev->guest_irq);
 123
 124        return IRQ_HANDLED;
 125}
 126#endif
 127
 128#ifdef __KVM_HAVE_MSIX
 129static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
 130{
 131        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
 132        int index = find_index_from_host_irq(assigned_dev, irq);
 133        u32 vector;
 134        int ret = 0;
 135
 136        if (index >= 0) {
 137                vector = assigned_dev->guest_msix_entries[index].vector;
 138                ret = kvm_set_irq_inatomic(assigned_dev->kvm,
 139                                           assigned_dev->irq_source_id,
 140                                           vector, 1);
 141        }
 142
 143        return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
 144}
 145
 146static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
 147{
 148        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
 149        int index = find_index_from_host_irq(assigned_dev, irq);
 150        u32 vector;
 151
 152        if (index >= 0) {
 153                vector = assigned_dev->guest_msix_entries[index].vector;
 154                kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
 155        }
 156
 157        return IRQ_HANDLED;
 158}
 159#endif
 160
 161/* Ack the irq line for an assigned device */
 162static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 163{
 164        struct kvm_assigned_dev_kernel *dev =
 165                container_of(kian, struct kvm_assigned_dev_kernel,
 166                             ack_notifier);
 167
 168        kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
 169
 170        spin_lock(&dev->intx_mask_lock);
 171
 172        if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
 173                bool reassert = false;
 174
 175                spin_lock_irq(&dev->intx_lock);
 176                /*
 177                 * The guest IRQ may be shared so this ack can come from an
 178                 * IRQ for another guest device.
 179                 */
 180                if (dev->host_irq_disabled) {
 181                        if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
 182                                enable_irq(dev->host_irq);
 183                        else if (!pci_check_and_unmask_intx(dev->dev))
 184                                reassert = true;
 185                        dev->host_irq_disabled = reassert;
 186                }
 187                spin_unlock_irq(&dev->intx_lock);
 188
 189                if (reassert)
 190                        kvm_set_irq(dev->kvm, dev->irq_source_id,
 191                                    dev->guest_irq, 1);
 192        }
 193
 194        spin_unlock(&dev->intx_mask_lock);
 195}
 196
 197static void deassign_guest_irq(struct kvm *kvm,
 198                               struct kvm_assigned_dev_kernel *assigned_dev)
 199{
 200        if (assigned_dev->ack_notifier.gsi != -1)
 201                kvm_unregister_irq_ack_notifier(kvm,
 202                                                &assigned_dev->ack_notifier);
 203
 204        kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
 205                    assigned_dev->guest_irq, 0);
 206
 207        if (assigned_dev->irq_source_id != -1)
 208                kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 209        assigned_dev->irq_source_id = -1;
 210        assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
 211}
 212
 213/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
 214static void deassign_host_irq(struct kvm *kvm,
 215                              struct kvm_assigned_dev_kernel *assigned_dev)
 216{
 217        /*
 218         * We disable irq here to prevent further events.
 219         *
 220         * Notice this maybe result in nested disable if the interrupt type is
 221         * INTx, but it's OK for we are going to free it.
 222         *
 223         * If this function is a part of VM destroy, please ensure that till
 224         * now, the kvm state is still legal for probably we also have to wait
 225         * on a currently running IRQ handler.
 226         */
 227        if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 228                int i;
 229                for (i = 0; i < assigned_dev->entries_nr; i++)
 230                        disable_irq(assigned_dev->host_msix_entries[i].vector);
 231
 232                for (i = 0; i < assigned_dev->entries_nr; i++)
 233                        free_irq(assigned_dev->host_msix_entries[i].vector,
 234                                 assigned_dev);
 235
 236                assigned_dev->entries_nr = 0;
 237                kfree(assigned_dev->host_msix_entries);
 238                kfree(assigned_dev->guest_msix_entries);
 239                pci_disable_msix(assigned_dev->dev);
 240        } else {
 241                /* Deal with MSI and INTx */
 242                if ((assigned_dev->irq_requested_type &
 243                     KVM_DEV_IRQ_HOST_INTX) &&
 244                    (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
 245                        spin_lock_irq(&assigned_dev->intx_lock);
 246                        pci_intx(assigned_dev->dev, false);
 247                        spin_unlock_irq(&assigned_dev->intx_lock);
 248                        synchronize_irq(assigned_dev->host_irq);
 249                } else
 250                        disable_irq(assigned_dev->host_irq);
 251
 252                free_irq(assigned_dev->host_irq, assigned_dev);
 253
 254                if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
 255                        pci_disable_msi(assigned_dev->dev);
 256        }
 257
 258        assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
 259}
 260
 261static int kvm_deassign_irq(struct kvm *kvm,
 262                            struct kvm_assigned_dev_kernel *assigned_dev,
 263                            unsigned long irq_requested_type)
 264{
 265        unsigned long guest_irq_type, host_irq_type;
 266
 267        if (!irqchip_in_kernel(kvm))
 268                return -EINVAL;
 269        /* no irq assignment to deassign */
 270        if (!assigned_dev->irq_requested_type)
 271                return -ENXIO;
 272
 273        host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
 274        guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
 275
 276        if (host_irq_type)
 277                deassign_host_irq(kvm, assigned_dev);
 278        if (guest_irq_type)
 279                deassign_guest_irq(kvm, assigned_dev);
 280
 281        return 0;
 282}
 283
 284static void kvm_free_assigned_irq(struct kvm *kvm,
 285                                  struct kvm_assigned_dev_kernel *assigned_dev)
 286{
 287        kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
 288}
 289
 290static void kvm_free_assigned_device(struct kvm *kvm,
 291                                     struct kvm_assigned_dev_kernel
 292                                     *assigned_dev)
 293{
 294        kvm_free_assigned_irq(kvm, assigned_dev);
 295
 296        pci_reset_function(assigned_dev->dev);
 297        if (pci_load_and_free_saved_state(assigned_dev->dev,
 298                                          &assigned_dev->pci_saved_state))
 299                printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
 300                       __func__, dev_name(&assigned_dev->dev->dev));
 301        else
 302                pci_restore_state(assigned_dev->dev);
 303
 304        assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 305
 306        pci_release_regions(assigned_dev->dev);
 307        pci_disable_device(assigned_dev->dev);
 308        pci_dev_put(assigned_dev->dev);
 309
 310        list_del(&assigned_dev->list);
 311        kfree(assigned_dev);
 312}
 313
 314void kvm_free_all_assigned_devices(struct kvm *kvm)
 315{
 316        struct list_head *ptr, *ptr2;
 317        struct kvm_assigned_dev_kernel *assigned_dev;
 318
 319        list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
 320                assigned_dev = list_entry(ptr,
 321                                          struct kvm_assigned_dev_kernel,
 322                                          list);
 323
 324                kvm_free_assigned_device(kvm, assigned_dev);
 325        }
 326}
 327
 328static int assigned_device_enable_host_intx(struct kvm *kvm,
 329                                            struct kvm_assigned_dev_kernel *dev)
 330{
 331        irq_handler_t irq_handler;
 332        unsigned long flags;
 333
 334        dev->host_irq = dev->dev->irq;
 335
 336        /*
 337         * We can only share the IRQ line with other host devices if we are
 338         * able to disable the IRQ source at device-level - independently of
 339         * the guest driver. Otherwise host devices may suffer from unbounded
 340         * IRQ latencies when the guest keeps the line asserted.
 341         */
 342        if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
 343                irq_handler = kvm_assigned_dev_intx;
 344                flags = IRQF_SHARED;
 345        } else {
 346                irq_handler = NULL;
 347                flags = IRQF_ONESHOT;
 348        }
 349        if (request_threaded_irq(dev->host_irq, irq_handler,
 350                                 kvm_assigned_dev_thread_intx, flags,
 351                                 dev->irq_name, dev))
 352                return -EIO;
 353
 354        if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
 355                spin_lock_irq(&dev->intx_lock);
 356                pci_intx(dev->dev, true);
 357                spin_unlock_irq(&dev->intx_lock);
 358        }
 359        return 0;
 360}
 361
 362#ifdef __KVM_HAVE_MSI
 363static int assigned_device_enable_host_msi(struct kvm *kvm,
 364                                           struct kvm_assigned_dev_kernel *dev)
 365{
 366        int r;
 367
 368        if (!dev->dev->msi_enabled) {
 369                r = pci_enable_msi(dev->dev);
 370                if (r)
 371                        return r;
 372        }
 373
 374        dev->host_irq = dev->dev->irq;
 375        if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
 376                                 kvm_assigned_dev_thread_msi, 0,
 377                                 dev->irq_name, dev)) {
 378                pci_disable_msi(dev->dev);
 379                return -EIO;
 380        }
 381
 382        return 0;
 383}
 384#endif
 385
 386#ifdef __KVM_HAVE_MSIX
 387static int assigned_device_enable_host_msix(struct kvm *kvm,
 388                                            struct kvm_assigned_dev_kernel *dev)
 389{
 390        int i, r = -EINVAL;
 391
 392        /* host_msix_entries and guest_msix_entries should have been
 393         * initialized */
 394        if (dev->entries_nr == 0)
 395                return r;
 396
 397        r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
 398        if (r)
 399                return r;
 400
 401        for (i = 0; i < dev->entries_nr; i++) {
 402                r = request_threaded_irq(dev->host_msix_entries[i].vector,
 403                                         kvm_assigned_dev_msix,
 404                                         kvm_assigned_dev_thread_msix,
 405                                         0, dev->irq_name, dev);
 406                if (r)
 407                        goto err;
 408        }
 409
 410        return 0;
 411err:
 412        for (i -= 1; i >= 0; i--)
 413                free_irq(dev->host_msix_entries[i].vector, dev);
 414        pci_disable_msix(dev->dev);
 415        return r;
 416}
 417
 418#endif
 419
 420static int assigned_device_enable_guest_intx(struct kvm *kvm,
 421                                struct kvm_assigned_dev_kernel *dev,
 422                                struct kvm_assigned_irq *irq)
 423{
 424        dev->guest_irq = irq->guest_irq;
 425        dev->ack_notifier.gsi = irq->guest_irq;
 426        return 0;
 427}
 428
 429#ifdef __KVM_HAVE_MSI
 430static int assigned_device_enable_guest_msi(struct kvm *kvm,
 431                        struct kvm_assigned_dev_kernel *dev,
 432                        struct kvm_assigned_irq *irq)
 433{
 434        dev->guest_irq = irq->guest_irq;
 435        dev->ack_notifier.gsi = -1;
 436        return 0;
 437}
 438#endif
 439
 440#ifdef __KVM_HAVE_MSIX
 441static int assigned_device_enable_guest_msix(struct kvm *kvm,
 442                        struct kvm_assigned_dev_kernel *dev,
 443                        struct kvm_assigned_irq *irq)
 444{
 445        dev->guest_irq = irq->guest_irq;
 446        dev->ack_notifier.gsi = -1;
 447        return 0;
 448}
 449#endif
 450
 451static int assign_host_irq(struct kvm *kvm,
 452                           struct kvm_assigned_dev_kernel *dev,
 453                           __u32 host_irq_type)
 454{
 455        int r = -EEXIST;
 456
 457        if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
 458                return r;
 459
 460        snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
 461                 pci_name(dev->dev));
 462
 463        switch (host_irq_type) {
 464        case KVM_DEV_IRQ_HOST_INTX:
 465                r = assigned_device_enable_host_intx(kvm, dev);
 466                break;
 467#ifdef __KVM_HAVE_MSI
 468        case KVM_DEV_IRQ_HOST_MSI:
 469                r = assigned_device_enable_host_msi(kvm, dev);
 470                break;
 471#endif
 472#ifdef __KVM_HAVE_MSIX
 473        case KVM_DEV_IRQ_HOST_MSIX:
 474                r = assigned_device_enable_host_msix(kvm, dev);
 475                break;
 476#endif
 477        default:
 478                r = -EINVAL;
 479        }
 480        dev->host_irq_disabled = false;
 481
 482        if (!r)
 483                dev->irq_requested_type |= host_irq_type;
 484
 485        return r;
 486}
 487
 488static int assign_guest_irq(struct kvm *kvm,
 489                            struct kvm_assigned_dev_kernel *dev,
 490                            struct kvm_assigned_irq *irq,
 491                            unsigned long guest_irq_type)
 492{
 493        int id;
 494        int r = -EEXIST;
 495
 496        if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
 497                return r;
 498
 499        id = kvm_request_irq_source_id(kvm);
 500        if (id < 0)
 501                return id;
 502
 503        dev->irq_source_id = id;
 504
 505        switch (guest_irq_type) {
 506        case KVM_DEV_IRQ_GUEST_INTX:
 507                r = assigned_device_enable_guest_intx(kvm, dev, irq);
 508                break;
 509#ifdef __KVM_HAVE_MSI
 510        case KVM_DEV_IRQ_GUEST_MSI:
 511                r = assigned_device_enable_guest_msi(kvm, dev, irq);
 512                break;
 513#endif
 514#ifdef __KVM_HAVE_MSIX
 515        case KVM_DEV_IRQ_GUEST_MSIX:
 516                r = assigned_device_enable_guest_msix(kvm, dev, irq);
 517                break;
 518#endif
 519        default:
 520                r = -EINVAL;
 521        }
 522
 523        if (!r) {
 524                dev->irq_requested_type |= guest_irq_type;
 525                if (dev->ack_notifier.gsi != -1)
 526                        kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
 527        } else
 528                kvm_free_irq_source_id(kvm, dev->irq_source_id);
 529
 530        return r;
 531}
 532
 533/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
 534static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 535                                   struct kvm_assigned_irq *assigned_irq)
 536{
 537        int r = -EINVAL;
 538        struct kvm_assigned_dev_kernel *match;
 539        unsigned long host_irq_type, guest_irq_type;
 540
 541        if (!irqchip_in_kernel(kvm))
 542                return r;
 543
 544        mutex_lock(&kvm->lock);
 545        r = -ENODEV;
 546        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 547                                      assigned_irq->assigned_dev_id);
 548        if (!match)
 549                goto out;
 550
 551        host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
 552        guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
 553
 554        r = -EINVAL;
 555        /* can only assign one type at a time */
 556        if (hweight_long(host_irq_type) > 1)
 557                goto out;
 558        if (hweight_long(guest_irq_type) > 1)
 559                goto out;
 560        if (host_irq_type == 0 && guest_irq_type == 0)
 561                goto out;
 562
 563        r = 0;
 564        if (host_irq_type)
 565                r = assign_host_irq(kvm, match, host_irq_type);
 566        if (r)
 567                goto out;
 568
 569        if (guest_irq_type)
 570                r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
 571out:
 572        mutex_unlock(&kvm->lock);
 573        return r;
 574}
 575
 576static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
 577                                         struct kvm_assigned_irq
 578                                         *assigned_irq)
 579{
 580        int r = -ENODEV;
 581        struct kvm_assigned_dev_kernel *match;
 582        unsigned long irq_type;
 583
 584        mutex_lock(&kvm->lock);
 585
 586        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 587                                      assigned_irq->assigned_dev_id);
 588        if (!match)
 589                goto out;
 590
 591        irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
 592                                          KVM_DEV_IRQ_GUEST_MASK);
 593        r = kvm_deassign_irq(kvm, match, irq_type);
 594out:
 595        mutex_unlock(&kvm->lock);
 596        return r;
 597}
 598
 599/*
 600 * We want to test whether the caller has been granted permissions to
 601 * use this device.  To be able to configure and control the device,
 602 * the user needs access to PCI configuration space and BAR resources.
 603 * These are accessed through PCI sysfs.  PCI config space is often
 604 * passed to the process calling this ioctl via file descriptor, so we
 605 * can't rely on access to that file.  We can check for permissions
 606 * on each of the BAR resource files, which is a pretty clear
 607 * indicator that the user has been granted access to the device.
 608 */
 609static int probe_sysfs_permissions(struct pci_dev *dev)
 610{
 611#ifdef CONFIG_SYSFS
 612        int i;
 613        bool bar_found = false;
 614
 615        for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
 616                char *kpath, *syspath;
 617                struct path path;
 618                struct inode *inode;
 619                int r;
 620
 621                if (!pci_resource_len(dev, i))
 622                        continue;
 623
 624                kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
 625                if (!kpath)
 626                        return -ENOMEM;
 627
 628                /* Per sysfs-rules, sysfs is always at /sys */
 629                syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
 630                kfree(kpath);
 631                if (!syspath)
 632                        return -ENOMEM;
 633
 634                r = kern_path(syspath, LOOKUP_FOLLOW, &path);
 635                kfree(syspath);
 636                if (r)
 637                        return r;
 638
 639                inode = path.dentry->d_inode;
 640
 641                r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
 642                path_put(&path);
 643                if (r)
 644                        return r;
 645
 646                bar_found = true;
 647        }
 648
 649        /* If no resources, probably something special */
 650        if (!bar_found)
 651                return -EPERM;
 652
 653        return 0;
 654#else
 655        return -EINVAL; /* No way to control the device without sysfs */
 656#endif
 657}
 658
 659static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 660                                      struct kvm_assigned_pci_dev *assigned_dev)
 661{
 662        int r = 0, idx;
 663        struct kvm_assigned_dev_kernel *match;
 664        struct pci_dev *dev;
 665
 666        if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
 667                return -EINVAL;
 668
 669        mutex_lock(&kvm->lock);
 670        idx = srcu_read_lock(&kvm->srcu);
 671
 672        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 673                                      assigned_dev->assigned_dev_id);
 674        if (match) {
 675                /* device already assigned */
 676                r = -EEXIST;
 677                goto out;
 678        }
 679
 680        match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
 681        if (match == NULL) {
 682                printk(KERN_INFO "%s: Couldn't allocate memory\n",
 683                       __func__);
 684                r = -ENOMEM;
 685                goto out;
 686        }
 687        dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
 688                                   assigned_dev->busnr,
 689                                   assigned_dev->devfn);
 690        if (!dev) {
 691                printk(KERN_INFO "%s: host device not found\n", __func__);
 692                r = -EINVAL;
 693                goto out_free;
 694        }
 695
 696        /* Don't allow bridges to be assigned */
 697        if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
 698                r = -EPERM;
 699                goto out_put;
 700        }
 701
 702        r = probe_sysfs_permissions(dev);
 703        if (r)
 704                goto out_put;
 705
 706        if (pci_enable_device(dev)) {
 707                printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
 708                r = -EBUSY;
 709                goto out_put;
 710        }
 711        r = pci_request_regions(dev, "kvm_assigned_device");
 712        if (r) {
 713                printk(KERN_INFO "%s: Could not get access to device regions\n",
 714                       __func__);
 715                goto out_disable;
 716        }
 717
 718        pci_reset_function(dev);
 719        pci_save_state(dev);
 720        match->pci_saved_state = pci_store_saved_state(dev);
 721        if (!match->pci_saved_state)
 722                printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
 723                       __func__, dev_name(&dev->dev));
 724
 725        if (!pci_intx_mask_supported(dev))
 726                assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
 727
 728        match->assigned_dev_id = assigned_dev->assigned_dev_id;
 729        match->host_segnr = assigned_dev->segnr;
 730        match->host_busnr = assigned_dev->busnr;
 731        match->host_devfn = assigned_dev->devfn;
 732        match->flags = assigned_dev->flags;
 733        match->dev = dev;
 734        spin_lock_init(&match->intx_lock);
 735        spin_lock_init(&match->intx_mask_lock);
 736        match->irq_source_id = -1;
 737        match->kvm = kvm;
 738        match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
 739
 740        list_add(&match->list, &kvm->arch.assigned_dev_head);
 741
 742        if (!kvm->arch.iommu_domain) {
 743                r = kvm_iommu_map_guest(kvm);
 744                if (r)
 745                        goto out_list_del;
 746        }
 747        r = kvm_assign_device(kvm, match);
 748        if (r)
 749                goto out_list_del;
 750
 751out:
 752        srcu_read_unlock(&kvm->srcu, idx);
 753        mutex_unlock(&kvm->lock);
 754        return r;
 755out_list_del:
 756        if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
 757                printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
 758                       __func__, dev_name(&dev->dev));
 759        list_del(&match->list);
 760        pci_release_regions(dev);
 761out_disable:
 762        pci_disable_device(dev);
 763out_put:
 764        pci_dev_put(dev);
 765out_free:
 766        kfree(match);
 767        srcu_read_unlock(&kvm->srcu, idx);
 768        mutex_unlock(&kvm->lock);
 769        return r;
 770}
 771
 772static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
 773                struct kvm_assigned_pci_dev *assigned_dev)
 774{
 775        int r = 0;
 776        struct kvm_assigned_dev_kernel *match;
 777
 778        mutex_lock(&kvm->lock);
 779
 780        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 781                                      assigned_dev->assigned_dev_id);
 782        if (!match) {
 783                printk(KERN_INFO "%s: device hasn't been assigned before, "
 784                  "so cannot be deassigned\n", __func__);
 785                r = -EINVAL;
 786                goto out;
 787        }
 788
 789        kvm_deassign_device(kvm, match);
 790
 791        kvm_free_assigned_device(kvm, match);
 792
 793out:
 794        mutex_unlock(&kvm->lock);
 795        return r;
 796}
 797
 798
 799#ifdef __KVM_HAVE_MSIX
 800static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
 801                                    struct kvm_assigned_msix_nr *entry_nr)
 802{
 803        int r = 0;
 804        struct kvm_assigned_dev_kernel *adev;
 805
 806        mutex_lock(&kvm->lock);
 807
 808        adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 809                                      entry_nr->assigned_dev_id);
 810        if (!adev) {
 811                r = -EINVAL;
 812                goto msix_nr_out;
 813        }
 814
 815        if (adev->entries_nr == 0) {
 816                adev->entries_nr = entry_nr->entry_nr;
 817                if (adev->entries_nr == 0 ||
 818                    adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
 819                        r = -EINVAL;
 820                        goto msix_nr_out;
 821                }
 822
 823                adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
 824                                                entry_nr->entry_nr,
 825                                                GFP_KERNEL);
 826                if (!adev->host_msix_entries) {
 827                        r = -ENOMEM;
 828                        goto msix_nr_out;
 829                }
 830                adev->guest_msix_entries =
 831                        kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
 832                                GFP_KERNEL);
 833                if (!adev->guest_msix_entries) {
 834                        kfree(adev->host_msix_entries);
 835                        r = -ENOMEM;
 836                        goto msix_nr_out;
 837                }
 838        } else /* Not allowed set MSI-X number twice */
 839                r = -EINVAL;
 840msix_nr_out:
 841        mutex_unlock(&kvm->lock);
 842        return r;
 843}
 844
 845static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
 846                                       struct kvm_assigned_msix_entry *entry)
 847{
 848        int r = 0, i;
 849        struct kvm_assigned_dev_kernel *adev;
 850
 851        mutex_lock(&kvm->lock);
 852
 853        adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 854                                      entry->assigned_dev_id);
 855
 856        if (!adev) {
 857                r = -EINVAL;
 858                goto msix_entry_out;
 859        }
 860
 861        for (i = 0; i < adev->entries_nr; i++)
 862                if (adev->guest_msix_entries[i].vector == 0 ||
 863                    adev->guest_msix_entries[i].entry == entry->entry) {
 864                        adev->guest_msix_entries[i].entry = entry->entry;
 865                        adev->guest_msix_entries[i].vector = entry->gsi;
 866                        adev->host_msix_entries[i].entry = entry->entry;
 867                        break;
 868                }
 869        if (i == adev->entries_nr) {
 870                r = -ENOSPC;
 871                goto msix_entry_out;
 872        }
 873
 874msix_entry_out:
 875        mutex_unlock(&kvm->lock);
 876
 877        return r;
 878}
 879#endif
 880
 881static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
 882                struct kvm_assigned_pci_dev *assigned_dev)
 883{
 884        int r = 0;
 885        struct kvm_assigned_dev_kernel *match;
 886
 887        mutex_lock(&kvm->lock);
 888
 889        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 890                                      assigned_dev->assigned_dev_id);
 891        if (!match) {
 892                r = -ENODEV;
 893                goto out;
 894        }
 895
 896        spin_lock(&match->intx_mask_lock);
 897
 898        match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
 899        match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
 900
 901        if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
 902                if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
 903                        kvm_set_irq(match->kvm, match->irq_source_id,
 904                                    match->guest_irq, 0);
 905                        /*
 906                         * Masking at hardware-level is performed on demand,
 907                         * i.e. when an IRQ actually arrives at the host.
 908                         */
 909                } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
 910                        /*
 911                         * Unmask the IRQ line if required. Unmasking at
 912                         * device level will be performed by user space.
 913                         */
 914                        spin_lock_irq(&match->intx_lock);
 915                        if (match->host_irq_disabled) {
 916                                enable_irq(match->host_irq);
 917                                match->host_irq_disabled = false;
 918                        }
 919                        spin_unlock_irq(&match->intx_lock);
 920                }
 921        }
 922
 923        spin_unlock(&match->intx_mask_lock);
 924
 925out:
 926        mutex_unlock(&kvm->lock);
 927        return r;
 928}
 929
 930long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 931                                  unsigned long arg)
 932{
 933        void __user *argp = (void __user *)arg;
 934        int r;
 935
 936        switch (ioctl) {
 937        case KVM_ASSIGN_PCI_DEVICE: {
 938                struct kvm_assigned_pci_dev assigned_dev;
 939
 940                r = -EFAULT;
 941                if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
 942                        goto out;
 943                r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
 944                if (r)
 945                        goto out;
 946                break;
 947        }
 948        case KVM_ASSIGN_IRQ: {
 949                r = -EOPNOTSUPP;
 950                break;
 951        }
 952        case KVM_ASSIGN_DEV_IRQ: {
 953                struct kvm_assigned_irq assigned_irq;
 954
 955                r = -EFAULT;
 956                if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
 957                        goto out;
 958                r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
 959                if (r)
 960                        goto out;
 961                break;
 962        }
 963        case KVM_DEASSIGN_DEV_IRQ: {
 964                struct kvm_assigned_irq assigned_irq;
 965
 966                r = -EFAULT;
 967                if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
 968                        goto out;
 969                r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
 970                if (r)
 971                        goto out;
 972                break;
 973        }
 974        case KVM_DEASSIGN_PCI_DEVICE: {
 975                struct kvm_assigned_pci_dev assigned_dev;
 976
 977                r = -EFAULT;
 978                if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
 979                        goto out;
 980                r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
 981                if (r)
 982                        goto out;
 983                break;
 984        }
 985#ifdef KVM_CAP_IRQ_ROUTING
 986        case KVM_SET_GSI_ROUTING: {
 987                struct kvm_irq_routing routing;
 988                struct kvm_irq_routing __user *urouting;
 989                struct kvm_irq_routing_entry *entries;
 990
 991                r = -EFAULT;
 992                if (copy_from_user(&routing, argp, sizeof(routing)))
 993                        goto out;
 994                r = -EINVAL;
 995                if (routing.nr >= KVM_MAX_IRQ_ROUTES)
 996                        goto out;
 997                if (routing.flags)
 998                        goto out;
 999                r = -ENOMEM;
1000                entries = vmalloc(routing.nr * sizeof(*entries));
1001                if (!entries)
1002                        goto out;
1003                r = -EFAULT;
1004                urouting = argp;
1005                if (copy_from_user(entries, urouting->entries,
1006                                   routing.nr * sizeof(*entries)))
1007                        goto out_free_irq_routing;
1008                r = kvm_set_irq_routing(kvm, entries, routing.nr,
1009                                        routing.flags);
1010        out_free_irq_routing:
1011                vfree(entries);
1012                break;
1013        }
1014#endif /* KVM_CAP_IRQ_ROUTING */
1015#ifdef __KVM_HAVE_MSIX
1016        case KVM_ASSIGN_SET_MSIX_NR: {
1017                struct kvm_assigned_msix_nr entry_nr;
1018                r = -EFAULT;
1019                if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1020                        goto out;
1021                r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1022                if (r)
1023                        goto out;
1024                break;
1025        }
1026        case KVM_ASSIGN_SET_MSIX_ENTRY: {
1027                struct kvm_assigned_msix_entry entry;
1028                r = -EFAULT;
1029                if (copy_from_user(&entry, argp, sizeof entry))
1030                        goto out;
1031                r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1032                if (r)
1033                        goto out;
1034                break;
1035        }
1036#endif
1037        case KVM_ASSIGN_SET_INTX_MASK: {
1038                struct kvm_assigned_pci_dev assigned_dev;
1039
1040                r = -EFAULT;
1041                if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1042                        goto out;
1043                r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1044                break;
1045        }
1046        default:
1047                r = -ENOTTY;
1048                break;
1049        }
1050out:
1051        return r;
1052}
1053