linux/drivers/vfio/pci/vfio_pci_rdwr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * VFIO PCI I/O Port & MMIO access
   4 *
   5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
   6 *     Author: Alex Williamson <alex.williamson@redhat.com>
   7 *
   8 * Derived from original vfio:
   9 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
  10 * Author: Tom Lyon, pugs@cisco.com
  11 */
  12
  13#include <linux/fs.h>
  14#include <linux/pci.h>
  15#include <linux/uaccess.h>
  16#include <linux/io.h>
  17#include <linux/vfio.h>
  18#include <linux/vgaarb.h>
  19
  20#include <linux/vfio_pci_core.h>
  21
  22#ifdef __LITTLE_ENDIAN
  23#define vfio_ioread64   ioread64
  24#define vfio_iowrite64  iowrite64
  25#define vfio_ioread32   ioread32
  26#define vfio_iowrite32  iowrite32
  27#define vfio_ioread16   ioread16
  28#define vfio_iowrite16  iowrite16
  29#else
  30#define vfio_ioread64   ioread64be
  31#define vfio_iowrite64  iowrite64be
  32#define vfio_ioread32   ioread32be
  33#define vfio_iowrite32  iowrite32be
  34#define vfio_ioread16   ioread16be
  35#define vfio_iowrite16  iowrite16be
  36#endif
  37#define vfio_ioread8    ioread8
  38#define vfio_iowrite8   iowrite8
  39
  40#define VFIO_IOWRITE(size) \
  41static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev,            \
  42                        bool test_mem, u##size val, void __iomem *io)   \
  43{                                                                       \
  44        if (test_mem) {                                                 \
  45                down_read(&vdev->memory_lock);                          \
  46                if (!__vfio_pci_memory_enabled(vdev)) {                 \
  47                        up_read(&vdev->memory_lock);                    \
  48                        return -EIO;                                    \
  49                }                                                       \
  50        }                                                               \
  51                                                                        \
  52        vfio_iowrite##size(val, io);                                    \
  53                                                                        \
  54        if (test_mem)                                                   \
  55                up_read(&vdev->memory_lock);                            \
  56                                                                        \
  57        return 0;                                                       \
  58}
  59
  60VFIO_IOWRITE(8)
  61VFIO_IOWRITE(16)
  62VFIO_IOWRITE(32)
  63#ifdef iowrite64
  64VFIO_IOWRITE(64)
  65#endif
  66
  67#define VFIO_IOREAD(size) \
  68static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev,             \
  69                        bool test_mem, u##size *val, void __iomem *io)  \
  70{                                                                       \
  71        if (test_mem) {                                                 \
  72                down_read(&vdev->memory_lock);                          \
  73                if (!__vfio_pci_memory_enabled(vdev)) {                 \
  74                        up_read(&vdev->memory_lock);                    \
  75                        return -EIO;                                    \
  76                }                                                       \
  77        }                                                               \
  78                                                                        \
  79        *val = vfio_ioread##size(io);                                   \
  80                                                                        \
  81        if (test_mem)                                                   \
  82                up_read(&vdev->memory_lock);                            \
  83                                                                        \
  84        return 0;                                                       \
  85}
  86
  87VFIO_IOREAD(8)
  88VFIO_IOREAD(16)
  89VFIO_IOREAD(32)
  90
  91/*
  92 * Read or write from an __iomem region (MMIO or I/O port) with an excluded
  93 * range which is inaccessible.  The excluded range drops writes and fills
  94 * reads with -1.  This is intended for handling MSI-X vector tables and
  95 * leftover space for ROM BARs.
  96 */
  97static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
  98                        void __iomem *io, char __user *buf,
  99                        loff_t off, size_t count, size_t x_start,
 100                        size_t x_end, bool iswrite)
 101{
 102        ssize_t done = 0;
 103        int ret;
 104
 105        while (count) {
 106                size_t fillable, filled;
 107
 108                if (off < x_start)
 109                        fillable = min(count, (size_t)(x_start - off));
 110                else if (off >= x_end)
 111                        fillable = count;
 112                else
 113                        fillable = 0;
 114
 115                if (fillable >= 4 && !(off % 4)) {
 116                        u32 val;
 117
 118                        if (iswrite) {
 119                                if (copy_from_user(&val, buf, 4))
 120                                        return -EFAULT;
 121
 122                                ret = vfio_pci_iowrite32(vdev, test_mem,
 123                                                         val, io + off);
 124                                if (ret)
 125                                        return ret;
 126                        } else {
 127                                ret = vfio_pci_ioread32(vdev, test_mem,
 128                                                        &val, io + off);
 129                                if (ret)
 130                                        return ret;
 131
 132                                if (copy_to_user(buf, &val, 4))
 133                                        return -EFAULT;
 134                        }
 135
 136                        filled = 4;
 137                } else if (fillable >= 2 && !(off % 2)) {
 138                        u16 val;
 139
 140                        if (iswrite) {
 141                                if (copy_from_user(&val, buf, 2))
 142                                        return -EFAULT;
 143
 144                                ret = vfio_pci_iowrite16(vdev, test_mem,
 145                                                         val, io + off);
 146                                if (ret)
 147                                        return ret;
 148                        } else {
 149                                ret = vfio_pci_ioread16(vdev, test_mem,
 150                                                        &val, io + off);
 151                                if (ret)
 152                                        return ret;
 153
 154                                if (copy_to_user(buf, &val, 2))
 155                                        return -EFAULT;
 156                        }
 157
 158                        filled = 2;
 159                } else if (fillable) {
 160                        u8 val;
 161
 162                        if (iswrite) {
 163                                if (copy_from_user(&val, buf, 1))
 164                                        return -EFAULT;
 165
 166                                ret = vfio_pci_iowrite8(vdev, test_mem,
 167                                                        val, io + off);
 168                                if (ret)
 169                                        return ret;
 170                        } else {
 171                                ret = vfio_pci_ioread8(vdev, test_mem,
 172                                                       &val, io + off);
 173                                if (ret)
 174                                        return ret;
 175
 176                                if (copy_to_user(buf, &val, 1))
 177                                        return -EFAULT;
 178                        }
 179
 180                        filled = 1;
 181                } else {
 182                        /* Fill reads with -1, drop writes */
 183                        filled = min(count, (size_t)(x_end - off));
 184                        if (!iswrite) {
 185                                u8 val = 0xFF;
 186                                size_t i;
 187
 188                                for (i = 0; i < filled; i++)
 189                                        if (copy_to_user(buf + i, &val, 1))
 190                                                return -EFAULT;
 191                        }
 192                }
 193
 194                count -= filled;
 195                done += filled;
 196                off += filled;
 197                buf += filled;
 198        }
 199
 200        return done;
 201}
 202
 203static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
 204{
 205        struct pci_dev *pdev = vdev->pdev;
 206        int ret;
 207        void __iomem *io;
 208
 209        if (vdev->barmap[bar])
 210                return 0;
 211
 212        ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
 213        if (ret)
 214                return ret;
 215
 216        io = pci_iomap(pdev, bar, 0);
 217        if (!io) {
 218                pci_release_selected_regions(pdev, 1 << bar);
 219                return -ENOMEM;
 220        }
 221
 222        vdev->barmap[bar] = io;
 223
 224        return 0;
 225}
 226
 227ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 228                        size_t count, loff_t *ppos, bool iswrite)
 229{
 230        struct pci_dev *pdev = vdev->pdev;
 231        loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
 232        int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 233        size_t x_start = 0, x_end = 0;
 234        resource_size_t end;
 235        void __iomem *io;
 236        struct resource *res = &vdev->pdev->resource[bar];
 237        ssize_t done;
 238
 239        if (pci_resource_start(pdev, bar))
 240                end = pci_resource_len(pdev, bar);
 241        else if (bar == PCI_ROM_RESOURCE &&
 242                 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
 243                end = 0x20000;
 244        else
 245                return -EINVAL;
 246
 247        if (pos >= end)
 248                return -EINVAL;
 249
 250        count = min(count, (size_t)(end - pos));
 251
 252        if (bar == PCI_ROM_RESOURCE) {
 253                /*
 254                 * The ROM can fill less space than the BAR, so we start the
 255                 * excluded range at the end of the actual ROM.  This makes
 256                 * filling large ROM BARs much faster.
 257                 */
 258                io = pci_map_rom(pdev, &x_start);
 259                if (!io) {
 260                        done = -ENOMEM;
 261                        goto out;
 262                }
 263                x_end = end;
 264        } else {
 265                int ret = vfio_pci_setup_barmap(vdev, bar);
 266                if (ret) {
 267                        done = ret;
 268                        goto out;
 269                }
 270
 271                io = vdev->barmap[bar];
 272        }
 273
 274        if (bar == vdev->msix_bar) {
 275                x_start = vdev->msix_offset;
 276                x_end = vdev->msix_offset + vdev->msix_size;
 277        }
 278
 279        done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
 280                        count, x_start, x_end, iswrite);
 281
 282        if (done >= 0)
 283                *ppos += done;
 284
 285        if (bar == PCI_ROM_RESOURCE)
 286                pci_unmap_rom(pdev, io);
 287out:
 288        return done;
 289}
 290
 291ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 292                               size_t count, loff_t *ppos, bool iswrite)
 293{
 294        int ret;
 295        loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
 296        void __iomem *iomem = NULL;
 297        unsigned int rsrc;
 298        bool is_ioport;
 299        ssize_t done;
 300
 301        if (!vdev->has_vga)
 302                return -EINVAL;
 303
 304        if (pos > 0xbfffful)
 305                return -EINVAL;
 306
 307        switch ((u32)pos) {
 308        case 0xa0000 ... 0xbffff:
 309                count = min(count, (size_t)(0xc0000 - pos));
 310                iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
 311                off = pos - 0xa0000;
 312                rsrc = VGA_RSRC_LEGACY_MEM;
 313                is_ioport = false;
 314                break;
 315        case 0x3b0 ... 0x3bb:
 316                count = min(count, (size_t)(0x3bc - pos));
 317                iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
 318                off = pos - 0x3b0;
 319                rsrc = VGA_RSRC_LEGACY_IO;
 320                is_ioport = true;
 321                break;
 322        case 0x3c0 ... 0x3df:
 323                count = min(count, (size_t)(0x3e0 - pos));
 324                iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
 325                off = pos - 0x3c0;
 326                rsrc = VGA_RSRC_LEGACY_IO;
 327                is_ioport = true;
 328                break;
 329        default:
 330                return -EINVAL;
 331        }
 332
 333        if (!iomem)
 334                return -ENOMEM;
 335
 336        ret = vga_get_interruptible(vdev->pdev, rsrc);
 337        if (ret) {
 338                is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
 339                return ret;
 340        }
 341
 342        /*
 343         * VGA MMIO is a legacy, non-BAR resource that hopefully allows
 344         * probing, so we don't currently worry about access in relation
 345         * to the memory enable bit in the command register.
 346         */
 347        done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite);
 348
 349        vga_put(vdev->pdev, rsrc);
 350
 351        is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
 352
 353        if (done >= 0)
 354                *ppos += done;
 355
 356        return done;
 357}
 358
 359static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
 360                                        bool test_mem)
 361{
 362        switch (ioeventfd->count) {
 363        case 1:
 364                vfio_pci_iowrite8(ioeventfd->vdev, test_mem,
 365                                  ioeventfd->data, ioeventfd->addr);
 366                break;
 367        case 2:
 368                vfio_pci_iowrite16(ioeventfd->vdev, test_mem,
 369                                   ioeventfd->data, ioeventfd->addr);
 370                break;
 371        case 4:
 372                vfio_pci_iowrite32(ioeventfd->vdev, test_mem,
 373                                   ioeventfd->data, ioeventfd->addr);
 374                break;
 375#ifdef iowrite64
 376        case 8:
 377                vfio_pci_iowrite64(ioeventfd->vdev, test_mem,
 378                                   ioeventfd->data, ioeventfd->addr);
 379                break;
 380#endif
 381        }
 382}
 383
 384static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
 385{
 386        struct vfio_pci_ioeventfd *ioeventfd = opaque;
 387        struct vfio_pci_core_device *vdev = ioeventfd->vdev;
 388
 389        if (ioeventfd->test_mem) {
 390                if (!down_read_trylock(&vdev->memory_lock))
 391                        return 1; /* Lock contended, use thread */
 392                if (!__vfio_pci_memory_enabled(vdev)) {
 393                        up_read(&vdev->memory_lock);
 394                        return 0;
 395                }
 396        }
 397
 398        vfio_pci_ioeventfd_do_write(ioeventfd, false);
 399
 400        if (ioeventfd->test_mem)
 401                up_read(&vdev->memory_lock);
 402
 403        return 0;
 404}
 405
 406static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
 407{
 408        struct vfio_pci_ioeventfd *ioeventfd = opaque;
 409
 410        vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
 411}
 412
 413long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
 414                        uint64_t data, int count, int fd)
 415{
 416        struct pci_dev *pdev = vdev->pdev;
 417        loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
 418        int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
 419        struct vfio_pci_ioeventfd *ioeventfd;
 420
 421        /* Only support ioeventfds into BARs */
 422        if (bar > VFIO_PCI_BAR5_REGION_INDEX)
 423                return -EINVAL;
 424
 425        if (pos + count > pci_resource_len(pdev, bar))
 426                return -EINVAL;
 427
 428        /* Disallow ioeventfds working around MSI-X table writes */
 429        if (bar == vdev->msix_bar &&
 430            !(pos + count <= vdev->msix_offset ||
 431              pos >= vdev->msix_offset + vdev->msix_size))
 432                return -EINVAL;
 433
 434#ifndef iowrite64
 435        if (count == 8)
 436                return -EINVAL;
 437#endif
 438
 439        ret = vfio_pci_setup_barmap(vdev, bar);
 440        if (ret)
 441                return ret;
 442
 443        mutex_lock(&vdev->ioeventfds_lock);
 444
 445        list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
 446                if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
 447                    ioeventfd->data == data && ioeventfd->count == count) {
 448                        if (fd == -1) {
 449                                vfio_virqfd_disable(&ioeventfd->virqfd);
 450                                list_del(&ioeventfd->next);
 451                                vdev->ioeventfds_nr--;
 452                                kfree(ioeventfd);
 453                                ret = 0;
 454                        } else
 455                                ret = -EEXIST;
 456
 457                        goto out_unlock;
 458                }
 459        }
 460
 461        if (fd < 0) {
 462                ret = -ENODEV;
 463                goto out_unlock;
 464        }
 465
 466        if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
 467                ret = -ENOSPC;
 468                goto out_unlock;
 469        }
 470
 471        ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL);
 472        if (!ioeventfd) {
 473                ret = -ENOMEM;
 474                goto out_unlock;
 475        }
 476
 477        ioeventfd->vdev = vdev;
 478        ioeventfd->addr = vdev->barmap[bar] + pos;
 479        ioeventfd->data = data;
 480        ioeventfd->pos = pos;
 481        ioeventfd->bar = bar;
 482        ioeventfd->count = count;
 483        ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
 484
 485        ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
 486                                 vfio_pci_ioeventfd_thread, NULL,
 487                                 &ioeventfd->virqfd, fd);
 488        if (ret) {
 489                kfree(ioeventfd);
 490                goto out_unlock;
 491        }
 492
 493        list_add(&ioeventfd->next, &vdev->ioeventfds_list);
 494        vdev->ioeventfds_nr++;
 495
 496out_unlock:
 497        mutex_unlock(&vdev->ioeventfds_lock);
 498
 499        return ret;
 500}
 501