linux/drivers/vfio/pci/vfio_pci_intrs.c
<<
>>
Prefs
   1/*
   2 * VFIO PCI interrupt handling
   3 *
   4 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
   5 *     Author: Alex Williamson <alex.williamson@redhat.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * Derived from original vfio:
  12 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
  13 * Author: Tom Lyon, pugs@cisco.com
  14 */
  15
  16#include <linux/device.h>
  17#include <linux/interrupt.h>
  18#include <linux/eventfd.h>
  19#include <linux/msi.h>
  20#include <linux/pci.h>
  21#include <linux/file.h>
  22#include <linux/poll.h>
  23#include <linux/vfio.h>
  24#include <linux/wait.h>
  25#include <linux/workqueue.h>
  26#include <linux/slab.h>
  27
  28#include "vfio_pci_private.h"
  29
  30/*
  31 * IRQfd - generic
  32 */
  33struct virqfd {
  34        struct vfio_pci_device  *vdev;
  35        struct eventfd_ctx      *eventfd;
  36        int                     (*handler)(struct vfio_pci_device *, void *);
  37        void                    (*thread)(struct vfio_pci_device *, void *);
  38        void                    *data;
  39        struct work_struct      inject;
  40        wait_queue_t            wait;
  41        poll_table              pt;
  42        struct work_struct      shutdown;
  43        struct virqfd           **pvirqfd;
  44};
  45
  46static struct workqueue_struct *vfio_irqfd_cleanup_wq;
  47
  48int __init vfio_pci_virqfd_init(void)
  49{
  50        vfio_irqfd_cleanup_wq =
  51                create_singlethread_workqueue("vfio-irqfd-cleanup");
  52        if (!vfio_irqfd_cleanup_wq)
  53                return -ENOMEM;
  54
  55        return 0;
  56}
  57
  58void vfio_pci_virqfd_exit(void)
  59{
  60        destroy_workqueue(vfio_irqfd_cleanup_wq);
  61}
  62
  63static void virqfd_deactivate(struct virqfd *virqfd)
  64{
  65        queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
  66}
  67
  68static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
  69{
  70        struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
  71        unsigned long flags = (unsigned long)key;
  72
  73        if (flags & POLLIN) {
  74                /* An event has been signaled, call function */
  75                if ((!virqfd->handler ||
  76                     virqfd->handler(virqfd->vdev, virqfd->data)) &&
  77                    virqfd->thread)
  78                        schedule_work(&virqfd->inject);
  79        }
  80
  81        if (flags & POLLHUP) {
  82                unsigned long flags;
  83                spin_lock_irqsave(&virqfd->vdev->irqlock, flags);
  84
  85                /*
  86                 * The eventfd is closing, if the virqfd has not yet been
  87                 * queued for release, as determined by testing whether the
  88                 * vdev pointer to it is still valid, queue it now.  As
  89                 * with kvm irqfds, we know we won't race against the virqfd
  90                 * going away because we hold wqh->lock to get here.
  91                 */
  92                if (*(virqfd->pvirqfd) == virqfd) {
  93                        *(virqfd->pvirqfd) = NULL;
  94                        virqfd_deactivate(virqfd);
  95                }
  96
  97                spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags);
  98        }
  99
 100        return 0;
 101}
 102
 103static void virqfd_ptable_queue_proc(struct file *file,
 104                                     wait_queue_head_t *wqh, poll_table *pt)
 105{
 106        struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
 107        add_wait_queue(wqh, &virqfd->wait);
 108}
 109
 110static void virqfd_shutdown(struct work_struct *work)
 111{
 112        struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
 113        u64 cnt;
 114
 115        eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
 116        flush_work(&virqfd->inject);
 117        eventfd_ctx_put(virqfd->eventfd);
 118
 119        kfree(virqfd);
 120}
 121
 122static void virqfd_inject(struct work_struct *work)
 123{
 124        struct virqfd *virqfd = container_of(work, struct virqfd, inject);
 125        if (virqfd->thread)
 126                virqfd->thread(virqfd->vdev, virqfd->data);
 127}
 128
 129static int virqfd_enable(struct vfio_pci_device *vdev,
 130                         int (*handler)(struct vfio_pci_device *, void *),
 131                         void (*thread)(struct vfio_pci_device *, void *),
 132                         void *data, struct virqfd **pvirqfd, int fd)
 133{
 134        struct fd irqfd;
 135        struct eventfd_ctx *ctx;
 136        struct virqfd *virqfd;
 137        int ret = 0;
 138        unsigned int events;
 139
 140        virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL);
 141        if (!virqfd)
 142                return -ENOMEM;
 143
 144        virqfd->pvirqfd = pvirqfd;
 145        virqfd->vdev = vdev;
 146        virqfd->handler = handler;
 147        virqfd->thread = thread;
 148        virqfd->data = data;
 149
 150        INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
 151        INIT_WORK(&virqfd->inject, virqfd_inject);
 152
 153        irqfd = fdget(fd);
 154        if (!irqfd.file) {
 155                ret = -EBADF;
 156                goto err_fd;
 157        }
 158
 159        ctx = eventfd_ctx_fileget(irqfd.file);
 160        if (IS_ERR(ctx)) {
 161                ret = PTR_ERR(ctx);
 162                goto err_ctx;
 163        }
 164
 165        virqfd->eventfd = ctx;
 166
 167        /*
 168         * virqfds can be released by closing the eventfd or directly
 169         * through ioctl.  These are both done through a workqueue, so
 170         * we update the pointer to the virqfd under lock to avoid
 171         * pushing multiple jobs to release the same virqfd.
 172         */
 173        spin_lock_irq(&vdev->irqlock);
 174
 175        if (*pvirqfd) {
 176                spin_unlock_irq(&vdev->irqlock);
 177                ret = -EBUSY;
 178                goto err_busy;
 179        }
 180        *pvirqfd = virqfd;
 181
 182        spin_unlock_irq(&vdev->irqlock);
 183
 184        /*
 185         * Install our own custom wake-up handling so we are notified via
 186         * a callback whenever someone signals the underlying eventfd.
 187         */
 188        init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
 189        init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
 190
 191        events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
 192
 193        /*
 194         * Check if there was an event already pending on the eventfd
 195         * before we registered and trigger it as if we didn't miss it.
 196         */
 197        if (events & POLLIN) {
 198                if ((!handler || handler(vdev, data)) && thread)
 199                        schedule_work(&virqfd->inject);
 200        }
 201
 202        /*
 203         * Do not drop the file until the irqfd is fully initialized,
 204         * otherwise we might race against the POLLHUP.
 205         */
 206        fdput(irqfd);
 207
 208        return 0;
 209err_busy:
 210        eventfd_ctx_put(ctx);
 211err_ctx:
 212        fdput(irqfd);
 213err_fd:
 214        kfree(virqfd);
 215
 216        return ret;
 217}
 218
 219static void virqfd_disable(struct vfio_pci_device *vdev,
 220                           struct virqfd **pvirqfd)
 221{
 222        unsigned long flags;
 223
 224        spin_lock_irqsave(&vdev->irqlock, flags);
 225
 226        if (*pvirqfd) {
 227                virqfd_deactivate(*pvirqfd);
 228                *pvirqfd = NULL;
 229        }
 230
 231        spin_unlock_irqrestore(&vdev->irqlock, flags);
 232
 233        /*
 234         * Block until we know all outstanding shutdown jobs have completed.
 235         * Even if we don't queue the job, flush the wq to be sure it's
 236         * been released.
 237         */
 238        flush_workqueue(vfio_irqfd_cleanup_wq);
 239}
 240
 241/*
 242 * INTx
 243 */
 244static void vfio_send_intx_eventfd(struct vfio_pci_device *vdev, void *unused)
 245{
 246        if (likely(is_intx(vdev) && !vdev->virq_disabled))
 247                eventfd_signal(vdev->ctx[0].trigger, 1);
 248}
 249
 250void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
 251{
 252        struct pci_dev *pdev = vdev->pdev;
 253        unsigned long flags;
 254
 255        spin_lock_irqsave(&vdev->irqlock, flags);
 256
 257        /*
 258         * Masking can come from interrupt, ioctl, or config space
 259         * via INTx disable.  The latter means this can get called
 260         * even when not using intx delivery.  In this case, just
 261         * try to have the physical bit follow the virtual bit.
 262         */
 263        if (unlikely(!is_intx(vdev))) {
 264                if (vdev->pci_2_3)
 265                        pci_intx(pdev, 0);
 266        } else if (!vdev->ctx[0].masked) {
 267                /*
 268                 * Can't use check_and_mask here because we always want to
 269                 * mask, not just when something is pending.
 270                 */
 271                if (vdev->pci_2_3)
 272                        pci_intx(pdev, 0);
 273                else
 274                        disable_irq_nosync(pdev->irq);
 275
 276                vdev->ctx[0].masked = true;
 277        }
 278
 279        spin_unlock_irqrestore(&vdev->irqlock, flags);
 280}
 281
 282/*
 283 * If this is triggered by an eventfd, we can't call eventfd_signal
 284 * or else we'll deadlock on the eventfd wait queue.  Return >0 when
 285 * a signal is necessary, which can then be handled via a work queue
 286 * or directly depending on the caller.
 287 */
 288static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev,
 289                                        void *unused)
 290{
 291        struct pci_dev *pdev = vdev->pdev;
 292        unsigned long flags;
 293        int ret = 0;
 294
 295        spin_lock_irqsave(&vdev->irqlock, flags);
 296
 297        /*
 298         * Unmasking comes from ioctl or config, so again, have the
 299         * physical bit follow the virtual even when not using INTx.
 300         */
 301        if (unlikely(!is_intx(vdev))) {
 302                if (vdev->pci_2_3)
 303                        pci_intx(pdev, 1);
 304        } else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
 305                /*
 306                 * A pending interrupt here would immediately trigger,
 307                 * but we can avoid that overhead by just re-sending
 308                 * the interrupt to the user.
 309                 */
 310                if (vdev->pci_2_3) {
 311                        if (!pci_check_and_unmask_intx(pdev))
 312                                ret = 1;
 313                } else
 314                        enable_irq(pdev->irq);
 315
 316                vdev->ctx[0].masked = (ret > 0);
 317        }
 318
 319        spin_unlock_irqrestore(&vdev->irqlock, flags);
 320
 321        return ret;
 322}
 323
 324void vfio_pci_intx_unmask(struct vfio_pci_device *vdev)
 325{
 326        if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
 327                vfio_send_intx_eventfd(vdev, NULL);
 328}
 329
 330static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
 331{
 332        struct vfio_pci_device *vdev = dev_id;
 333        unsigned long flags;
 334        int ret = IRQ_NONE;
 335
 336        spin_lock_irqsave(&vdev->irqlock, flags);
 337
 338        if (!vdev->pci_2_3) {
 339                disable_irq_nosync(vdev->pdev->irq);
 340                vdev->ctx[0].masked = true;
 341                ret = IRQ_HANDLED;
 342        } else if (!vdev->ctx[0].masked &&  /* may be shared */
 343                   pci_check_and_mask_intx(vdev->pdev)) {
 344                vdev->ctx[0].masked = true;
 345                ret = IRQ_HANDLED;
 346        }
 347
 348        spin_unlock_irqrestore(&vdev->irqlock, flags);
 349
 350        if (ret == IRQ_HANDLED)
 351                vfio_send_intx_eventfd(vdev, NULL);
 352
 353        return ret;
 354}
 355
 356static int vfio_intx_enable(struct vfio_pci_device *vdev)
 357{
 358        if (!is_irq_none(vdev))
 359                return -EINVAL;
 360
 361        if (!vdev->pdev->irq)
 362                return -ENODEV;
 363
 364        vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
 365        if (!vdev->ctx)
 366                return -ENOMEM;
 367
 368        vdev->num_ctx = 1;
 369
 370        /*
 371         * If the virtual interrupt is masked, restore it.  Devices
 372         * supporting DisINTx can be masked at the hardware level
 373         * here, non-PCI-2.3 devices will have to wait until the
 374         * interrupt is enabled.
 375         */
 376        vdev->ctx[0].masked = vdev->virq_disabled;
 377        if (vdev->pci_2_3)
 378                pci_intx(vdev->pdev, !vdev->ctx[0].masked);
 379
 380        vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
 381
 382        return 0;
 383}
 384
 385static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
 386{
 387        struct pci_dev *pdev = vdev->pdev;
 388        unsigned long irqflags = IRQF_SHARED;
 389        struct eventfd_ctx *trigger;
 390        unsigned long flags;
 391        int ret;
 392
 393        if (vdev->ctx[0].trigger) {
 394                free_irq(pdev->irq, vdev);
 395                kfree(vdev->ctx[0].name);
 396                eventfd_ctx_put(vdev->ctx[0].trigger);
 397                vdev->ctx[0].trigger = NULL;
 398        }
 399
 400        if (fd < 0) /* Disable only */
 401                return 0;
 402
 403        vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
 404                                      pci_name(pdev));
 405        if (!vdev->ctx[0].name)
 406                return -ENOMEM;
 407
 408        trigger = eventfd_ctx_fdget(fd);
 409        if (IS_ERR(trigger)) {
 410                kfree(vdev->ctx[0].name);
 411                return PTR_ERR(trigger);
 412        }
 413
 414        vdev->ctx[0].trigger = trigger;
 415
 416        if (!vdev->pci_2_3)
 417                irqflags = 0;
 418
 419        ret = request_irq(pdev->irq, vfio_intx_handler,
 420                          irqflags, vdev->ctx[0].name, vdev);
 421        if (ret) {
 422                vdev->ctx[0].trigger = NULL;
 423                kfree(vdev->ctx[0].name);
 424                eventfd_ctx_put(trigger);
 425                return ret;
 426        }
 427
 428        /*
 429         * INTx disable will stick across the new irq setup,
 430         * disable_irq won't.
 431         */
 432        spin_lock_irqsave(&vdev->irqlock, flags);
 433        if (!vdev->pci_2_3 && vdev->ctx[0].masked)
 434                disable_irq_nosync(pdev->irq);
 435        spin_unlock_irqrestore(&vdev->irqlock, flags);
 436
 437        return 0;
 438}
 439
 440static void vfio_intx_disable(struct vfio_pci_device *vdev)
 441{
 442        virqfd_disable(vdev, &vdev->ctx[0].unmask);
 443        virqfd_disable(vdev, &vdev->ctx[0].mask);
 444        vfio_intx_set_signal(vdev, -1);
 445        vdev->irq_type = VFIO_PCI_NUM_IRQS;
 446        vdev->num_ctx = 0;
 447        kfree(vdev->ctx);
 448}
 449
 450/*
 451 * MSI/MSI-X
 452 */
 453static irqreturn_t vfio_msihandler(int irq, void *arg)
 454{
 455        struct eventfd_ctx *trigger = arg;
 456
 457        eventfd_signal(trigger, 1);
 458        return IRQ_HANDLED;
 459}
 460
 461static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
 462{
 463        struct pci_dev *pdev = vdev->pdev;
 464        int ret;
 465
 466        if (!is_irq_none(vdev))
 467                return -EINVAL;
 468
 469        vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
 470        if (!vdev->ctx)
 471                return -ENOMEM;
 472
 473        if (msix) {
 474                int i;
 475
 476                vdev->msix = kzalloc(nvec * sizeof(struct msix_entry),
 477                                     GFP_KERNEL);
 478                if (!vdev->msix) {
 479                        kfree(vdev->ctx);
 480                        return -ENOMEM;
 481                }
 482
 483                for (i = 0; i < nvec; i++)
 484                        vdev->msix[i].entry = i;
 485
 486                ret = pci_enable_msix_range(pdev, vdev->msix, 1, nvec);
 487                if (ret < nvec) {
 488                        if (ret > 0)
 489                                pci_disable_msix(pdev);
 490                        kfree(vdev->msix);
 491                        kfree(vdev->ctx);
 492                        return ret;
 493                }
 494        } else {
 495                ret = pci_enable_msi_range(pdev, 1, nvec);
 496                if (ret < nvec) {
 497                        if (ret > 0)
 498                                pci_disable_msi(pdev);
 499                        kfree(vdev->ctx);
 500                        return ret;
 501                }
 502        }
 503
 504        vdev->num_ctx = nvec;
 505        vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
 506                                VFIO_PCI_MSI_IRQ_INDEX;
 507
 508        if (!msix) {
 509                /*
 510                 * Compute the virtual hardware field for max msi vectors -
 511                 * it is the log base 2 of the number of vectors.
 512                 */
 513                vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
 514        }
 515
 516        return 0;
 517}
 518
 519static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 520                                      int vector, int fd, bool msix)
 521{
 522        struct pci_dev *pdev = vdev->pdev;
 523        struct eventfd_ctx *trigger;
 524        int irq, ret;
 525
 526        if (vector < 0 || vector >= vdev->num_ctx)
 527                return -EINVAL;
 528
 529        irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
 530
 531        if (vdev->ctx[vector].trigger) {
 532                free_irq(irq, vdev->ctx[vector].trigger);
 533                irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
 534                kfree(vdev->ctx[vector].name);
 535                eventfd_ctx_put(vdev->ctx[vector].trigger);
 536                vdev->ctx[vector].trigger = NULL;
 537        }
 538
 539        if (fd < 0)
 540                return 0;
 541
 542        vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
 543                                           msix ? "x" : "", vector,
 544                                           pci_name(pdev));
 545        if (!vdev->ctx[vector].name)
 546                return -ENOMEM;
 547
 548        trigger = eventfd_ctx_fdget(fd);
 549        if (IS_ERR(trigger)) {
 550                kfree(vdev->ctx[vector].name);
 551                return PTR_ERR(trigger);
 552        }
 553
 554        /*
 555         * The MSIx vector table resides in device memory which may be cleared
 556         * via backdoor resets. We don't allow direct access to the vector
 557         * table so even if a userspace driver attempts to save/restore around
 558         * such a reset it would be unsuccessful. To avoid this, restore the
 559         * cached value of the message prior to enabling.
 560         */
 561        if (msix) {
 562                struct msi_msg msg;
 563
 564                get_cached_msi_msg(irq, &msg);
 565                write_msi_msg(irq, &msg);
 566        }
 567
 568        ret = request_irq(irq, vfio_msihandler, 0,
 569                          vdev->ctx[vector].name, trigger);
 570        if (ret) {
 571                kfree(vdev->ctx[vector].name);
 572                eventfd_ctx_put(trigger);
 573                return ret;
 574        }
 575
 576        vdev->ctx[vector].producer.token = trigger;
 577        vdev->ctx[vector].producer.irq = irq;
 578        ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
 579        if (unlikely(ret))
 580                dev_info(&pdev->dev,
 581                "irq bypass producer (token %p) registration fails: %d\n",
 582                vdev->ctx[vector].producer.token, ret);
 583
 584        vdev->ctx[vector].trigger = trigger;
 585
 586        return 0;
 587}
 588
 589static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
 590                              unsigned count, int32_t *fds, bool msix)
 591{
 592        int i, j, ret = 0;
 593
 594        if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
 595                return -EINVAL;
 596
 597        for (i = 0, j = start; i < count && !ret; i++, j++) {
 598                int fd = fds ? fds[i] : -1;
 599                ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
 600        }
 601
 602        if (ret) {
 603                for (--j; j >= (int)start; j--)
 604                        vfio_msi_set_vector_signal(vdev, j, -1, msix);
 605        }
 606
 607        return ret;
 608}
 609
 610static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
 611{
 612        struct pci_dev *pdev = vdev->pdev;
 613        int i;
 614
 615        for (i = 0; i < vdev->num_ctx; i++) {
 616                virqfd_disable(vdev, &vdev->ctx[i].unmask);
 617                virqfd_disable(vdev, &vdev->ctx[i].mask);
 618        }
 619
 620        vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
 621
 622        if (msix) {
 623                pci_disable_msix(vdev->pdev);
 624                kfree(vdev->msix);
 625        } else
 626                pci_disable_msi(pdev);
 627
 628        /*
 629         * Both disable paths above use pci_intx_for_msi() to clear DisINTx
 630         * via their shutdown paths.  Restore for NoINTx devices.
 631         */
 632        if (vdev->nointx)
 633                pci_intx(pdev, 0);
 634
 635        vdev->irq_type = VFIO_PCI_NUM_IRQS;
 636        vdev->num_ctx = 0;
 637        kfree(vdev->ctx);
 638}
 639
 640/*
 641 * IOCTL support
 642 */
 643static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
 644                                    unsigned index, unsigned start,
 645                                    unsigned count, uint32_t flags, void *data)
 646{
 647        if (!is_intx(vdev) || start != 0 || count != 1)
 648                return -EINVAL;
 649
 650        if (flags & VFIO_IRQ_SET_DATA_NONE) {
 651                vfio_pci_intx_unmask(vdev);
 652        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
 653                uint8_t unmask = *(uint8_t *)data;
 654                if (unmask)
 655                        vfio_pci_intx_unmask(vdev);
 656        } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 657                int32_t fd = *(int32_t *)data;
 658                if (fd >= 0)
 659                        return virqfd_enable(vdev, vfio_pci_intx_unmask_handler,
 660                                             vfio_send_intx_eventfd, NULL,
 661                                             &vdev->ctx[0].unmask, fd);
 662
 663                virqfd_disable(vdev, &vdev->ctx[0].unmask);
 664        }
 665
 666        return 0;
 667}
 668
 669static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev,
 670                                  unsigned index, unsigned start,
 671                                  unsigned count, uint32_t flags, void *data)
 672{
 673        if (!is_intx(vdev) || start != 0 || count != 1)
 674                return -EINVAL;
 675
 676        if (flags & VFIO_IRQ_SET_DATA_NONE) {
 677                vfio_pci_intx_mask(vdev);
 678        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
 679                uint8_t mask = *(uint8_t *)data;
 680                if (mask)
 681                        vfio_pci_intx_mask(vdev);
 682        } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 683                return -ENOTTY; /* XXX implement me */
 684        }
 685
 686        return 0;
 687}
 688
 689static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev,
 690                                     unsigned index, unsigned start,
 691                                     unsigned count, uint32_t flags, void *data)
 692{
 693        if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
 694                vfio_intx_disable(vdev);
 695                return 0;
 696        }
 697
 698        if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
 699                return -EINVAL;
 700
 701        if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 702                int32_t fd = *(int32_t *)data;
 703                int ret;
 704
 705                if (is_intx(vdev))
 706                        return vfio_intx_set_signal(vdev, fd);
 707
 708                ret = vfio_intx_enable(vdev);
 709                if (ret)
 710                        return ret;
 711
 712                ret = vfio_intx_set_signal(vdev, fd);
 713                if (ret)
 714                        vfio_intx_disable(vdev);
 715
 716                return ret;
 717        }
 718
 719        if (!is_intx(vdev))
 720                return -EINVAL;
 721
 722        if (flags & VFIO_IRQ_SET_DATA_NONE) {
 723                vfio_send_intx_eventfd(vdev, NULL);
 724        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
 725                uint8_t trigger = *(uint8_t *)data;
 726                if (trigger)
 727                        vfio_send_intx_eventfd(vdev, NULL);
 728        }
 729        return 0;
 730}
 731
 732static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
 733                                    unsigned index, unsigned start,
 734                                    unsigned count, uint32_t flags, void *data)
 735{
 736        int i;
 737        bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
 738
 739        if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
 740                vfio_msi_disable(vdev, msix);
 741                return 0;
 742        }
 743
 744        if (!(irq_is(vdev, index) || is_irq_none(vdev)))
 745                return -EINVAL;
 746
 747        if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 748                int32_t *fds = data;
 749                int ret;
 750
 751                if (vdev->irq_type == index)
 752                        return vfio_msi_set_block(vdev, start, count,
 753                                                  fds, msix);
 754
 755                ret = vfio_msi_enable(vdev, start + count, msix);
 756                if (ret)
 757                        return ret;
 758
 759                ret = vfio_msi_set_block(vdev, start, count, fds, msix);
 760                if (ret)
 761                        vfio_msi_disable(vdev, msix);
 762
 763                return ret;
 764        }
 765
 766        if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
 767                return -EINVAL;
 768
 769        for (i = start; i < start + count; i++) {
 770                if (!vdev->ctx[i].trigger)
 771                        continue;
 772                if (flags & VFIO_IRQ_SET_DATA_NONE) {
 773                        eventfd_signal(vdev->ctx[i].trigger, 1);
 774                } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
 775                        uint8_t *bools = data;
 776                        if (bools[i - start])
 777                                eventfd_signal(vdev->ctx[i].trigger, 1);
 778                }
 779        }
 780        return 0;
 781}
 782
 783static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
 784                                           unsigned int count, uint32_t flags,
 785                                           void *data)
 786{
 787        /* DATA_NONE/DATA_BOOL enables loopback testing */
 788        if (flags & VFIO_IRQ_SET_DATA_NONE) {
 789                if (*ctx) {
 790                        if (count) {
 791                                eventfd_signal(*ctx, 1);
 792                        } else {
 793                                eventfd_ctx_put(*ctx);
 794                                *ctx = NULL;
 795                        }
 796                        return 0;
 797                }
 798        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
 799                uint8_t trigger;
 800
 801                if (!count)
 802                        return -EINVAL;
 803
 804                trigger = *(uint8_t *)data;
 805                if (trigger && *ctx)
 806                        eventfd_signal(*ctx, 1);
 807
 808                return 0;
 809        } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
 810                int32_t fd;
 811
 812                if (!count)
 813                        return -EINVAL;
 814
 815                fd = *(int32_t *)data;
 816                if (fd == -1) {
 817                        if (*ctx)
 818                                eventfd_ctx_put(*ctx);
 819                        *ctx = NULL;
 820                } else if (fd >= 0) {
 821                        struct eventfd_ctx *efdctx;
 822
 823                        efdctx = eventfd_ctx_fdget(fd);
 824                        if (IS_ERR(efdctx))
 825                                return PTR_ERR(efdctx);
 826
 827                        if (*ctx)
 828                                eventfd_ctx_put(*ctx);
 829
 830                        *ctx = efdctx;
 831                }
 832                return 0;
 833        }
 834
 835        return -EINVAL;
 836}
 837
 838static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
 839                                    unsigned index, unsigned start,
 840                                    unsigned count, uint32_t flags, void *data)
 841{
 842        if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
 843                return -EINVAL;
 844
 845        return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
 846                                               count, flags, data);
 847}
 848
 849static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
 850                                    unsigned index, unsigned start,
 851                                    unsigned count, uint32_t flags, void *data)
 852{
 853        if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
 854                return -EINVAL;
 855
 856        return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
 857                                               count, flags, data);
 858}
 859
 860int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
 861                            unsigned index, unsigned start, unsigned count,
 862                            void *data)
 863{
 864        int (*func)(struct vfio_pci_device *vdev, unsigned index,
 865                    unsigned start, unsigned count, uint32_t flags,
 866                    void *data) = NULL;
 867
 868        switch (index) {
 869        case VFIO_PCI_INTX_IRQ_INDEX:
 870                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
 871                case VFIO_IRQ_SET_ACTION_MASK:
 872                        func = vfio_pci_set_intx_mask;
 873                        break;
 874                case VFIO_IRQ_SET_ACTION_UNMASK:
 875                        func = vfio_pci_set_intx_unmask;
 876                        break;
 877                case VFIO_IRQ_SET_ACTION_TRIGGER:
 878                        func = vfio_pci_set_intx_trigger;
 879                        break;
 880                }
 881                break;
 882        case VFIO_PCI_MSI_IRQ_INDEX:
 883        case VFIO_PCI_MSIX_IRQ_INDEX:
 884                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
 885                case VFIO_IRQ_SET_ACTION_MASK:
 886                case VFIO_IRQ_SET_ACTION_UNMASK:
 887                        /* XXX Need masking support exported */
 888                        break;
 889                case VFIO_IRQ_SET_ACTION_TRIGGER:
 890                        func = vfio_pci_set_msi_trigger;
 891                        break;
 892                }
 893                break;
 894        case VFIO_PCI_ERR_IRQ_INDEX:
 895                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
 896                case VFIO_IRQ_SET_ACTION_TRIGGER:
 897                        if (pci_is_pcie(vdev->pdev))
 898                                func = vfio_pci_set_err_trigger;
 899                        break;
 900                }
 901                break;
 902        case VFIO_PCI_REQ_IRQ_INDEX:
 903                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
 904                case VFIO_IRQ_SET_ACTION_TRIGGER:
 905                        func = vfio_pci_set_req_trigger;
 906                        break;
 907                }
 908                break;
 909        }
 910
 911        if (!func)
 912                return -ENOTTY;
 913
 914        return func(vdev, index, start, count, flags, data);
 915}
 916