linux/drivers/vfio/pci/vfio_pci_intrs.c
<<
>>
Prefs
   1/*
   2 * VFIO PCI interrupt handling
   3 *
   4 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
   5 *     Author: Alex Williamson <alex.williamson@redhat.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * Derived from original vfio:
  12 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
  13 * Author: Tom Lyon, pugs@cisco.com
  14 */
  15
  16#include <linux/device.h>
  17#include <linux/interrupt.h>
  18#include <linux/eventfd.h>
  19#include <linux/pci.h>
  20#include <linux/file.h>
  21#include <linux/poll.h>
  22#include <linux/vfio.h>
  23#include <linux/wait.h>
  24#include <linux/workqueue.h>
  25#include <linux/slab.h>
  26
  27#include "vfio_pci_private.h"
  28
  29/*
  30 * IRQfd - generic
  31 */
  32struct virqfd {
  33        struct vfio_pci_device  *vdev;
  34        struct eventfd_ctx      *eventfd;
  35        int                     (*handler)(struct vfio_pci_device *, void *);
  36        void                    (*thread)(struct vfio_pci_device *, void *);
  37        void                    *data;
  38        struct work_struct      inject;
  39        wait_queue_t            wait;
  40        poll_table              pt;
  41        struct work_struct      shutdown;
  42        struct virqfd           **pvirqfd;
  43};
  44
  45static struct workqueue_struct *vfio_irqfd_cleanup_wq;
  46
  47int __init vfio_pci_virqfd_init(void)
  48{
  49        vfio_irqfd_cleanup_wq =
  50                create_singlethread_workqueue("vfio-irqfd-cleanup");
  51        if (!vfio_irqfd_cleanup_wq)
  52                return -ENOMEM;
  53
  54        return 0;
  55}
  56
  57void vfio_pci_virqfd_exit(void)
  58{
  59        destroy_workqueue(vfio_irqfd_cleanup_wq);
  60}
  61
  62static void virqfd_deactivate(struct virqfd *virqfd)
  63{
  64        queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
  65}
  66
  67static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
  68{
  69        struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
  70        unsigned long flags = (unsigned long)key;
  71
  72        if (flags & POLLIN) {
  73                /* An event has been signaled, call function */
  74                if ((!virqfd->handler ||
  75                     virqfd->handler(virqfd->vdev, virqfd->data)) &&
  76                    virqfd->thread)
  77                        schedule_work(&virqfd->inject);
  78        }
  79
  80        if (flags & POLLHUP) {
  81                unsigned long flags;
  82                spin_lock_irqsave(&virqfd->vdev->irqlock, flags);
  83
  84                /*
  85                 * The eventfd is closing, if the virqfd has not yet been
  86                 * queued for release, as determined by testing whether the
  87                 * vdev pointer to it is still valid, queue it now.  As
  88                 * with kvm irqfds, we know we won't race against the virqfd
  89                 * going away because we hold wqh->lock to get here.
  90                 */
  91                if (*(virqfd->pvirqfd) == virqfd) {
  92                        *(virqfd->pvirqfd) = NULL;
  93                        virqfd_deactivate(virqfd);
  94                }
  95
  96                spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags);
  97        }
  98
  99        return 0;
 100}
 101
 102static void virqfd_ptable_queue_proc(struct file *file,
 103                                     wait_queue_head_t *wqh, poll_table *pt)
 104{
 105        struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
 106        add_wait_queue(wqh, &virqfd->wait);
 107}
 108
 109static void virqfd_shutdown(struct work_struct *work)
 110{
 111        struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
 112        u64 cnt;
 113
 114        eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
 115        flush_work(&virqfd->inject);
 116        eventfd_ctx_put(virqfd->eventfd);
 117
 118        kfree(virqfd);
 119}
 120
 121static void virqfd_inject(struct work_struct *work)
 122{
 123        struct virqfd *virqfd = container_of(work, struct virqfd, inject);
 124        if (virqfd->thread)
 125                virqfd->thread(virqfd->vdev, virqfd->data);
 126}
 127
 128static int virqfd_enable(struct vfio_pci_device *vdev,
 129                         int (*handler)(struct vfio_pci_device *, void *),
 130                         void (*thread)(struct vfio_pci_device *, void *),
 131                         void *data, struct virqfd **pvirqfd, int fd)
 132{
 133        struct fd irqfd;
 134        struct eventfd_ctx *ctx;
 135        struct virqfd *virqfd;
 136        int ret = 0;
 137        unsigned int events;
 138
 139        virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL);
 140        if (!virqfd)
 141                return -ENOMEM;
 142
 143        virqfd->pvirqfd = pvirqfd;
 144        virqfd->vdev = vdev;
 145        virqfd->handler = handler;
 146        virqfd->thread = thread;
 147        virqfd->data = data;
 148
 149        INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
 150        INIT_WORK(&virqfd->inject, virqfd_inject);
 151
 152        irqfd = fdget(fd);
 153        if (!irqfd.file) {
 154                ret = -EBADF;
 155                goto err_fd;
 156        }
 157
 158        ctx = eventfd_ctx_fileget(irqfd.file);
 159        if (IS_ERR(ctx)) {
 160                ret = PTR_ERR(ctx);
 161                goto err_ctx;
 162        }
 163
 164        virqfd->eventfd = ctx;
 165
 166        /*
 167         * virqfds can be released by closing the eventfd or directly
 168         * through ioctl.  These are both done through a workqueue, so
 169         * we update the pointer to the virqfd under lock to avoid
 170         * pushing multiple jobs to release the same virqfd.
 171         */
 172        spin_lock_irq(&vdev->irqlock);
 173
 174        if (*pvirqfd) {
 175                spin_unlock_irq(&vdev->irqlock);
 176                ret = -EBUSY;
 177                goto err_busy;
 178        }
 179        *pvirqfd = virqfd;
 180
 181        spin_unlock_irq(&vdev->irqlock);
 182
 183        /*
 184         * Install our own custom wake-up handling so we are notified via
 185         * a callback whenever someone signals the underlying eventfd.
 186         */
 187        init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
 188        init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
 189
 190        events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
 191
 192        /*
 193         * Check if there was an event already pending on the eventfd
 194         * before we registered and trigger it as if we didn't miss it.
 195         */
 196        if (events & POLLIN) {
 197                if ((!handler || handler(vdev, data)) && thread)
 198                        schedule_work(&virqfd->inject);
 199        }
 200
 201        /*
 202         * Do not drop the file until the irqfd is fully initialized,
 203         * otherwise we might race against the POLLHUP.
 204         */
 205        fdput(irqfd);
 206
 207        return 0;
 208err_busy:
 209        eventfd_ctx_put(ctx);
 210err_ctx:
 211        fdput(irqfd);
 212err_fd:
 213        kfree(virqfd);
 214
 215        return ret;
 216}
 217
 218static void virqfd_disable(struct vfio_pci_device *vdev,
 219                           struct virqfd **pvirqfd)
 220{
 221        unsigned long flags;
 222
 223        spin_lock_irqsave(&vdev->irqlock, flags);
 224
 225        if (*pvirqfd) {
 226                virqfd_deactivate(*pvirqfd);
 227                *pvirqfd = NULL;
 228        }
 229
 230        spin_unlock_irqrestore(&vdev->irqlock, flags);
 231
 232        /*
 233         * Block until we know all outstanding shutdown jobs have completed.
 234         * Even if we don't queue the job, flush the wq to be sure it's
 235         * been released.
 236         */
 237        flush_workqueue(vfio_irqfd_cleanup_wq);
 238}
 239
 240/*
 241 * INTx
 242 */
 243static void vfio_send_intx_eventfd(struct vfio_pci_device *vdev, void *unused)
 244{
 245        if (likely(is_intx(vdev) && !vdev->virq_disabled))
 246                eventfd_signal(vdev->ctx[0].trigger, 1);
 247}
 248
 249void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
 250{
 251        struct pci_dev *pdev = vdev->pdev;
 252        unsigned long flags;
 253
 254        spin_lock_irqsave(&vdev->irqlock, flags);
 255
 256        /*
 257         * Masking can come from interrupt, ioctl, or config space
 258         * via INTx disable.  The latter means this can get called
 259         * even when not using intx delivery.  In this case, just
 260         * try to have the physical bit follow the virtual bit.
 261         */
 262        if (unlikely(!is_intx(vdev))) {
 263                if (vdev->pci_2_3)
 264                        pci_intx(pdev, 0);
 265        } else if (!vdev->ctx[0].masked) {
 266                /*
 267                 * Can't use check_and_mask here because we always want to
 268                 * mask, not just when something is pending.
 269                 */
 270                if (vdev->pci_2_3)
 271                        pci_intx(pdev, 0);
 272                else
 273                        disable_irq_nosync(pdev->irq);
 274
 275                vdev->ctx[0].masked = true;
 276        }
 277
 278        spin_unlock_irqrestore(&vdev->irqlock, flags);
 279}
 280
 281/*
 282 * If this is triggered by an eventfd, we can't call eventfd_signal
 283 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
 284 * a signal is necessary, which can then be handled via a work queue
 285 * or directly depending on the caller.
 286 */
 287static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev,
 288                                        void *unused)
 289{
 290        struct pci_dev *pdev = vdev->pdev;
 291        unsigned long flags;
 292        int ret = 0;
 293
 294        spin_lock_irqsave(&vdev->irqlock, flags);
 295
 296        /*
 297         * Unmasking comes from ioctl or config, so again, have the
 298         * physical bit follow the virtual even when not using INTx.
 299         */
 300        if (unlikely(!is_intx(vdev))) {
 301                if (vdev->pci_2_3)
 302                        pci_intx(pdev, 1);
 303        } else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
 304                /*
 305                 * A pending interrupt here would immediately trigger,
 306                 * but we can avoid that overhead by just re-sending
 307                 * the interrupt to the user.
 308                 */
 309                if (vdev->pci_2_3) {
 310                        if (!pci_check_and_unmask_intx(pdev))
 311                                ret = 1;
 312                } else
 313                        enable_irq(pdev->irq);
 314
 315                vdev->ctx[0].masked = (ret > 0);
 316        }
 317
 318        spin_unlock_irqrestore(&vdev->irqlock, flags);
 319
 320        return ret;
 321}
 322
 323void vfio_pci_intx_unmask(struct vfio_pci_device *vdev)
 324{
 325        if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
 326                vfio_send_intx_eventfd(vdev, NULL);
 327}
 328
 329static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
 330{
 331        struct vfio_pci_device *vdev = dev_id;
 332        unsigned long flags;
 333        int ret = IRQ_NONE;
 334
 335        spin_lock_irqsave(&vdev->irqlock, flags);
 336
 337        if (!vdev->pci_2_3) {
 338                disable_irq_nosync(vdev->pdev->irq);
 339                vdev->ctx[0].masked = true;
 340                ret = IRQ_HANDLED;
 341        } else if (!vdev->ctx[0].masked &&  /* may be shared */
 342                   pci_check_and_mask_intx(vdev->pdev)) {
 343                vdev->ctx[0].masked = true;
 344                ret = IRQ_HANDLED;
 345        }
 346
 347        spin_unlock_irqrestore(&vdev->irqlock, flags);
 348
 349        if (ret == IRQ_HANDLED)
 350                vfio_send_intx_eventfd(vdev, NULL);
 351
 352        return ret;
 353}
 354
 355static int vfio_intx_enable(struct vfio_pci_device *vdev)
 356{
 357        if (!is_irq_none(vdev))
 358                return -EINVAL;
 359
 360        if (!vdev->pdev->irq)
 361                return -ENODEV;
 362
 363        vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
 364        if (!vdev->ctx)
 365                return -ENOMEM;
 366
 367        vdev->num_ctx = 1;
 368
 369        /*
 370         * If the virtual interrupt is masked, restore it.  Devices
 371         * supporting DisINTx can be masked at the hardware level
 372         * here, non-PCI-2.3 devices will have to wait until the
 373         * interrupt is enabled.
 374         */
 375        vdev->ctx[0].masked = vdev->virq_disabled;
 376        if (vdev->pci_2_3)
 377                pci_intx(vdev->pdev, !vdev->ctx[0].masked);
 378
 379        vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
 380
 381        return 0;
 382}
 383
 384static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
 385{
 386        struct pci_dev *pdev = vdev->pdev;
 387        unsigned long irqflags = IRQF_SHARED;
 388        struct eventfd_ctx *trigger;
 389        unsigned long flags;
 390        int ret;
 391
 392        if (vdev->ctx[0].trigger) {
 393                free_irq(pdev->irq, vdev);
 394                kfree(vdev->ctx[0].name);
 395                eventfd_ctx_put(vdev->ctx[0].trigger);
 396                vdev->ctx[0].trigger = NULL;
 397        }
 398
 399        if (fd < 0) /* Disable only */
 400                return 0;
 401
 402        vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
 403                                      pci_name(pdev));
 404        if (!vdev->ctx[0].name)
 405                return -ENOMEM;
 406
 407        trigger = eventfd_ctx_fdget(fd);
 408        if (IS_ERR(trigger)) {
 409                kfree(vdev->ctx[0].name);
 410                return PTR_ERR(trigger);
 411        }
 412
 413        vdev->ctx[0].trigger = trigger;
 414
 415        if (!vdev->pci_2_3)
 416                irqflags = 0;
 417
 418        ret = request_irq(pdev->irq, vfio_intx_handler,
 419                          irqflags, vdev->ctx[0].name, vdev);
 420        if (ret) {
 421                vdev->ctx[0].trigger = NULL;
 422                kfree(vdev->ctx[0].name);
 423                eventfd_ctx_put(trigger);
 424                return ret;
 425        }
 426
 427        /*
 428         * INTx disable will stick across the new irq setup,
 429         * disable_irq won't.
 430         */
 431        spin_lock_irqsave(&vdev->irqlock, flags);
 432        if (!vdev->pci_2_3 && vdev->ctx[0].masked)
 433                disable_irq_nosync(pdev->irq);
 434        spin_unlock_irqrestore(&vdev->irqlock, flags);
 435
 436        return 0;
 437}
 438
 439static void vfio_intx_disable(struct vfio_pci_device *vdev)
 440{
 441        vfio_intx_set_signal(vdev, -1);
 442        virqfd_disable(vdev, &vdev->ctx[0].unmask);
 443        virqfd_disable(vdev, &vdev->ctx[0].mask);
 444        vdev->irq_type = VFIO_PCI_NUM_IRQS;
 445        vdev->num_ctx = 0;
 446        kfree(vdev->ctx);
 447}
 448
 449/*
 450 * MSI/MSI-X
 451 */
 452static irqreturn_t vfio_msihandler(int irq, void *arg)
 453{
 454        struct eventfd_ctx *trigger = arg;
 455
 456        eventfd_signal(trigger, 1);
 457        return IRQ_HANDLED;
 458}
 459
 460static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
 461{
 462        struct pci_dev *pdev = vdev->pdev;
 463        int ret;
 464
 465        if (!is_irq_none(vdev))
 466                return -EINVAL;
 467
 468        vdev->ctx = kzalloc(nvec * sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
 469        if (!vdev->ctx)
 470                return -ENOMEM;
 471
 472        if (msix) {
 473                int i;
 474
 475                vdev->msix = kzalloc(nvec * sizeof(struct msix_entry),
 476                                     GFP_KERNEL);
 477                if (!vdev->msix) {
 478                        kfree(vdev->ctx);
 479                        return -ENOMEM;
 480                }
 481
 482                for (i = 0; i < nvec; i++)
 483                        vdev->msix[i].entry = i;
 484
 485                ret = pci_enable_msix(pdev, vdev->msix, nvec);
 486                if (ret) {
 487                        kfree(vdev->msix);
 488                        kfree(vdev->ctx);
 489                        return ret;
 490                }
 491        } else {
 492                ret = pci_enable_msi_block(pdev, nvec);
 493                if (ret) {
 494                        kfree(vdev->ctx);
 495                        return ret;
 496                }
 497        }
 498
 499        vdev->num_ctx = nvec;
 500        vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
 501                                VFIO_PCI_MSI_IRQ_INDEX;
 502
 503        if (!msix) {
 504                /*
 505                 * Compute the virtual hardware field for max msi vectors -
 506                 * it is the log base 2 of the number of vectors.
 507                 */
 508                vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
 509        }
 510
 511        return 0;
 512}
 513
 514static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 515                                      int vector, int fd, bool msix)
 516{
 517        struct pci_dev *pdev = vdev->pdev;
 518        int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
 519        char *name = msix ? "vfio-msix" : "vfio-msi";
 520        struct eventfd_ctx *trigger;
 521        int ret;
 522
 523        if (vector >= vdev->num_ctx)
 524                return -EINVAL;
 525
 526        if (vdev->ctx[vector].trigger) {
 527                free_irq(irq, vdev->ctx[vector].trigger);
 528                kfree(vdev->ctx[vector].name);
 529                eventfd_ctx_put(vdev->ctx[vector].trigger);
 530                vdev->ctx[vector].trigger = NULL;
 531        }
 532
 533        if (fd < 0)
 534                return 0;
 535
 536        vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "%s[%d](%s)",
 537                                           name, vector, pci_name(pdev));
 538        if (!vdev->ctx[vector].name)
 539                return -ENOMEM;
 540
 541        trigger = eventfd_ctx_fdget(fd);
 542        if (IS_ERR(trigger)) {
 543                kfree(vdev->ctx[vector].name);
 544                return PTR_ERR(trigger);
 545        }
 546
 547        ret = request_irq(irq, vfio_msihandler, 0,
 548                          vdev->ctx[vector].name, trigger);
 549        if (ret) {
 550                kfree(vdev->ctx[vector].name);
 551                eventfd_ctx_put(trigger);
 552                return ret;
 553        }
 554
 555        vdev->ctx[vector].trigger = trigger;
 556
 557        return 0;
 558}
 559
 560static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
 561                              unsigned count, int32_t *fds, bool msix)
 562{
 563        int i, j, ret = 0;
 564
 565        if (start + count > vdev->num_ctx)
 566                return -EINVAL;
 567
 568        for (i = 0, j = start; i < count && !ret; i++, j++) {
 569                int fd = fds ? fds[i] : -1;
 570                ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
 571        }
 572
 573        if (ret) {
 574                for (--j; j >= start; j--)
 575                        vfio_msi_set_vector_signal(vdev, j, -1, msix);
 576        }
 577
 578        return ret;
 579}
 580
 581static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
 582{
 583        struct pci_dev *pdev = vdev->pdev;
 584        int i;
 585
 586        vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
 587
 588        for (i = 0; i < vdev->num_ctx; i++) {
 589                virqfd_disable(vdev, &vdev->ctx[i].unmask);
 590                virqfd_disable(vdev, &vdev->ctx[i].mask);
 591        }
 592
 593        if (msix) {
 594                pci_disable_msix(vdev->pdev);
 595                kfree(vdev->msix);
 596        } else
 597                pci_disable_msi(pdev);
 598
 599        vdev->irq_type = VFIO_PCI_NUM_IRQS;
 600        vdev->num_ctx = 0;
 601        kfree(vdev->ctx);
 602}
 603
 604/*
 605 * IOCTL support
 606 */
 607static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
 608                                    unsigned index, unsigned start,
 609                                    unsigned count, uint32_t flags, void *data)
 610{
 611        if (!is_intx(vdev) || start != 0 || count != 1)
 612                return -EINVAL;
 613
 614        if (flags & VFIO_IRQ_SET_DATA_NONE) {
 615                vfio_pci_intx_unmask(vdev);
 616        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
 617                uint8_t unmask = *(uint8_t *)data;
 618                if (unmask)
 619                        vfio_pci_intx_unmask(vdev);
 620        } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 621                int32_t fd = *(int32_t *)data;
 622                if (fd >= 0)
 623                        return virqfd_enable(vdev, vfio_pci_intx_unmask_handler,
 624                                             vfio_send_intx_eventfd, NULL,
 625                                             &vdev->ctx[0].unmask, fd);
 626
 627                virqfd_disable(vdev, &vdev->ctx[0].unmask);
 628        }
 629
 630        return 0;
 631}
 632
 633static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev,
 634                                  unsigned index, unsigned start,
 635                                  unsigned count, uint32_t flags, void *data)
 636{
 637        if (!is_intx(vdev) || start != 0 || count != 1)
 638                return -EINVAL;
 639
 640        if (flags & VFIO_IRQ_SET_DATA_NONE) {
 641                vfio_pci_intx_mask(vdev);
 642        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
 643                uint8_t mask = *(uint8_t *)data;
 644                if (mask)
 645                        vfio_pci_intx_mask(vdev);
 646        } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 647                return -ENOTTY; /* XXX implement me */
 648        }
 649
 650        return 0;
 651}
 652
 653static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev,
 654                                     unsigned index, unsigned start,
 655                                     unsigned count, uint32_t flags, void *data)
 656{
 657        if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
 658                vfio_intx_disable(vdev);
 659                return 0;
 660        }
 661
 662        if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
 663                return -EINVAL;
 664
 665        if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 666                int32_t fd = *(int32_t *)data;
 667                int ret;
 668
 669                if (is_intx(vdev))
 670                        return vfio_intx_set_signal(vdev, fd);
 671
 672                ret = vfio_intx_enable(vdev);
 673                if (ret)
 674                        return ret;
 675
 676                ret = vfio_intx_set_signal(vdev, fd);
 677                if (ret)
 678                        vfio_intx_disable(vdev);
 679
 680                return ret;
 681        }
 682
 683        if (!is_intx(vdev))
 684                return -EINVAL;
 685
 686        if (flags & VFIO_IRQ_SET_DATA_NONE) {
 687                vfio_send_intx_eventfd(vdev, NULL);
 688        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
 689                uint8_t trigger = *(uint8_t *)data;
 690                if (trigger)
 691                        vfio_send_intx_eventfd(vdev, NULL);
 692        }
 693        return 0;
 694}
 695
 696static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
 697                                    unsigned index, unsigned start,
 698                                    unsigned count, uint32_t flags, void *data)
 699{
 700        int i;
 701        bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
 702
 703        if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
 704                vfio_msi_disable(vdev, msix);
 705                return 0;
 706        }
 707
 708        if (!(irq_is(vdev, index) || is_irq_none(vdev)))
 709                return -EINVAL;
 710
 711        if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 712                int32_t *fds = data;
 713                int ret;
 714
 715                if (vdev->irq_type == index)
 716                        return vfio_msi_set_block(vdev, start, count,
 717                                                  fds, msix);
 718
 719                ret = vfio_msi_enable(vdev, start + count, msix);
 720                if (ret)
 721                        return ret;
 722
 723                ret = vfio_msi_set_block(vdev, start, count, fds, msix);
 724                if (ret)
 725                        vfio_msi_disable(vdev, msix);
 726
 727                return ret;
 728        }
 729
 730        if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
 731                return -EINVAL;
 732
 733        for (i = start; i < start + count; i++) {
 734                if (!vdev->ctx[i].trigger)
 735                        continue;
 736                if (flags & VFIO_IRQ_SET_DATA_NONE) {
 737                        eventfd_signal(vdev->ctx[i].trigger, 1);
 738                } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
 739                        uint8_t *bools = data;
 740                        if (bools[i - start])
 741                                eventfd_signal(vdev->ctx[i].trigger, 1);
 742                }
 743        }
 744        return 0;
 745}
 746
 747static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
 748                                    unsigned index, unsigned start,
 749                                    unsigned count, uint32_t flags, void *data)
 750{
 751        int32_t fd = *(int32_t *)data;
 752        struct pci_dev *pdev = vdev->pdev;
 753
 754        if ((index != VFIO_PCI_ERR_IRQ_INDEX) ||
 755            !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
 756                return -EINVAL;
 757
 758        /*
 759         * device_lock synchronizes setting and checking of
 760         * err_trigger. The vfio_pci_aer_err_detected() is also
 761         * called with device_lock held.
 762         */
 763
 764        /* DATA_NONE/DATA_BOOL enables loopback testing */
 765
 766        if (flags & VFIO_IRQ_SET_DATA_NONE) {
 767                device_lock(&pdev->dev);
 768                if (vdev->err_trigger)
 769                        eventfd_signal(vdev->err_trigger, 1);
 770                device_unlock(&pdev->dev);
 771                return 0;
 772        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
 773                uint8_t trigger = *(uint8_t *)data;
 774                device_lock(&pdev->dev);
 775                if (trigger && vdev->err_trigger)
 776                        eventfd_signal(vdev->err_trigger, 1);
 777                device_unlock(&pdev->dev);
 778                return 0;
 779        }
 780
 781        /* Handle SET_DATA_EVENTFD */
 782
 783        if (fd == -1) {
 784                device_lock(&pdev->dev);
 785                if (vdev->err_trigger)
 786                        eventfd_ctx_put(vdev->err_trigger);
 787                vdev->err_trigger = NULL;
 788                device_unlock(&pdev->dev);
 789                return 0;
 790        } else if (fd >= 0) {
 791                struct eventfd_ctx *efdctx;
 792                efdctx = eventfd_ctx_fdget(fd);
 793                if (IS_ERR(efdctx))
 794                        return PTR_ERR(efdctx);
 795                device_lock(&pdev->dev);
 796                if (vdev->err_trigger)
 797                        eventfd_ctx_put(vdev->err_trigger);
 798                vdev->err_trigger = efdctx;
 799                device_unlock(&pdev->dev);
 800                return 0;
 801        } else
 802                return -EINVAL;
 803}
 804int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
 805                            unsigned index, unsigned start, unsigned count,
 806                            void *data)
 807{
 808        int (*func)(struct vfio_pci_device *vdev, unsigned index,
 809                    unsigned start, unsigned count, uint32_t flags,
 810                    void *data) = NULL;
 811
 812        switch (index) {
 813        case VFIO_PCI_INTX_IRQ_INDEX:
 814                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
 815                case VFIO_IRQ_SET_ACTION_MASK:
 816                        func = vfio_pci_set_intx_mask;
 817                        break;
 818                case VFIO_IRQ_SET_ACTION_UNMASK:
 819                        func = vfio_pci_set_intx_unmask;
 820                        break;
 821                case VFIO_IRQ_SET_ACTION_TRIGGER:
 822                        func = vfio_pci_set_intx_trigger;
 823                        break;
 824                }
 825                break;
 826        case VFIO_PCI_MSI_IRQ_INDEX:
 827        case VFIO_PCI_MSIX_IRQ_INDEX:
 828                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
 829                case VFIO_IRQ_SET_ACTION_MASK:
 830                case VFIO_IRQ_SET_ACTION_UNMASK:
 831                        /* XXX Need masking support exported */
 832                        break;
 833                case VFIO_IRQ_SET_ACTION_TRIGGER:
 834                        func = vfio_pci_set_msi_trigger;
 835                        break;
 836                }
 837                break;
 838        case VFIO_PCI_ERR_IRQ_INDEX:
 839                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
 840                case VFIO_IRQ_SET_ACTION_TRIGGER:
 841                        if (pci_is_pcie(vdev->pdev))
 842                                func = vfio_pci_set_err_trigger;
 843                        break;
 844                }
 845        }
 846
 847        if (!func)
 848                return -ENOTTY;
 849
 850        return func(vdev, index, start, count, flags, data);
 851}
 852