linux/drivers/vfio/pci/vfio_pci_rdwr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * VFIO PCI I/O Port & MMIO access
   4 *
   5 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
   6 *     Author: Alex Williamson <alex.williamson@redhat.com>
   7 *
   8 * Derived from original vfio:
   9 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
  10 * Author: Tom Lyon, pugs@cisco.com
  11 */
  12
  13#include <linux/fs.h>
  14#include <linux/pci.h>
  15#include <linux/uaccess.h>
  16#include <linux/io.h>
  17#include <linux/vfio.h>
  18#include <linux/vgaarb.h>
  19
  20#include "vfio_pci_private.h"
  21
  22#ifdef __LITTLE_ENDIAN
  23#define vfio_ioread64   ioread64
  24#define vfio_iowrite64  iowrite64
  25#define vfio_ioread32   ioread32
  26#define vfio_iowrite32  iowrite32
  27#define vfio_ioread16   ioread16
  28#define vfio_iowrite16  iowrite16
  29#else
  30#define vfio_ioread64   ioread64be
  31#define vfio_iowrite64  iowrite64be
  32#define vfio_ioread32   ioread32be
  33#define vfio_iowrite32  iowrite32be
  34#define vfio_ioread16   ioread16be
  35#define vfio_iowrite16  iowrite16be
  36#endif
  37#define vfio_ioread8    ioread8
  38#define vfio_iowrite8   iowrite8
  39
  40/*
  41 * Read or write from an __iomem region (MMIO or I/O port) with an excluded
  42 * range which is inaccessible.  The excluded range drops writes and fills
  43 * reads with -1.  This is intended for handling MSI-X vector tables and
  44 * leftover space for ROM BARs.
  45 */
  46static ssize_t do_io_rw(void __iomem *io, char __user *buf,
  47                        loff_t off, size_t count, size_t x_start,
  48                        size_t x_end, bool iswrite)
  49{
  50        ssize_t done = 0;
  51
  52        while (count) {
  53                size_t fillable, filled;
  54
  55                if (off < x_start)
  56                        fillable = min(count, (size_t)(x_start - off));
  57                else if (off >= x_end)
  58                        fillable = count;
  59                else
  60                        fillable = 0;
  61
  62                if (fillable >= 4 && !(off % 4)) {
  63                        u32 val;
  64
  65                        if (iswrite) {
  66                                if (copy_from_user(&val, buf, 4))
  67                                        return -EFAULT;
  68
  69                                vfio_iowrite32(val, io + off);
  70                        } else {
  71                                val = vfio_ioread32(io + off);
  72
  73                                if (copy_to_user(buf, &val, 4))
  74                                        return -EFAULT;
  75                        }
  76
  77                        filled = 4;
  78                } else if (fillable >= 2 && !(off % 2)) {
  79                        u16 val;
  80
  81                        if (iswrite) {
  82                                if (copy_from_user(&val, buf, 2))
  83                                        return -EFAULT;
  84
  85                                vfio_iowrite16(val, io + off);
  86                        } else {
  87                                val = vfio_ioread16(io + off);
  88
  89                                if (copy_to_user(buf, &val, 2))
  90                                        return -EFAULT;
  91                        }
  92
  93                        filled = 2;
  94                } else if (fillable) {
  95                        u8 val;
  96
  97                        if (iswrite) {
  98                                if (copy_from_user(&val, buf, 1))
  99                                        return -EFAULT;
 100
 101                                vfio_iowrite8(val, io + off);
 102                        } else {
 103                                val = vfio_ioread8(io + off);
 104
 105                                if (copy_to_user(buf, &val, 1))
 106                                        return -EFAULT;
 107                        }
 108
 109                        filled = 1;
 110                } else {
 111                        /* Fill reads with -1, drop writes */
 112                        filled = min(count, (size_t)(x_end - off));
 113                        if (!iswrite) {
 114                                u8 val = 0xFF;
 115                                size_t i;
 116
 117                                for (i = 0; i < filled; i++)
 118                                        if (copy_to_user(buf + i, &val, 1))
 119                                                return -EFAULT;
 120                        }
 121                }
 122
 123                count -= filled;
 124                done += filled;
 125                off += filled;
 126                buf += filled;
 127        }
 128
 129        return done;
 130}
 131
 132static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar)
 133{
 134        struct pci_dev *pdev = vdev->pdev;
 135        int ret;
 136        void __iomem *io;
 137
 138        if (vdev->barmap[bar])
 139                return 0;
 140
 141        ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
 142        if (ret)
 143                return ret;
 144
 145        io = pci_iomap(pdev, bar, 0);
 146        if (!io) {
 147                pci_release_selected_regions(pdev, 1 << bar);
 148                return -ENOMEM;
 149        }
 150
 151        vdev->barmap[bar] = io;
 152
 153        return 0;
 154}
 155
 156ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
 157                        size_t count, loff_t *ppos, bool iswrite)
 158{
 159        struct pci_dev *pdev = vdev->pdev;
 160        loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
 161        int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 162        size_t x_start = 0, x_end = 0;
 163        resource_size_t end;
 164        void __iomem *io;
 165        ssize_t done;
 166
 167        if (pci_resource_start(pdev, bar))
 168                end = pci_resource_len(pdev, bar);
 169        else if (bar == PCI_ROM_RESOURCE &&
 170                 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
 171                end = 0x20000;
 172        else
 173                return -EINVAL;
 174
 175        if (pos >= end)
 176                return -EINVAL;
 177
 178        count = min(count, (size_t)(end - pos));
 179
 180        if (bar == PCI_ROM_RESOURCE) {
 181                /*
 182                 * The ROM can fill less space than the BAR, so we start the
 183                 * excluded range at the end of the actual ROM.  This makes
 184                 * filling large ROM BARs much faster.
 185                 */
 186                io = pci_map_rom(pdev, &x_start);
 187                if (!io)
 188                        return -ENOMEM;
 189                x_end = end;
 190        } else {
 191                int ret = vfio_pci_setup_barmap(vdev, bar);
 192                if (ret)
 193                        return ret;
 194
 195                io = vdev->barmap[bar];
 196        }
 197
 198        if (bar == vdev->msix_bar) {
 199                x_start = vdev->msix_offset;
 200                x_end = vdev->msix_offset + vdev->msix_size;
 201        }
 202
 203        done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
 204
 205        if (done >= 0)
 206                *ppos += done;
 207
 208        if (bar == PCI_ROM_RESOURCE)
 209                pci_unmap_rom(pdev, io);
 210
 211        return done;
 212}
 213
 214ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
 215                               size_t count, loff_t *ppos, bool iswrite)
 216{
 217        int ret;
 218        loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
 219        void __iomem *iomem = NULL;
 220        unsigned int rsrc;
 221        bool is_ioport;
 222        ssize_t done;
 223
 224        if (!vdev->has_vga)
 225                return -EINVAL;
 226
 227        if (pos > 0xbfffful)
 228                return -EINVAL;
 229
 230        switch ((u32)pos) {
 231        case 0xa0000 ... 0xbffff:
 232                count = min(count, (size_t)(0xc0000 - pos));
 233                iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1);
 234                off = pos - 0xa0000;
 235                rsrc = VGA_RSRC_LEGACY_MEM;
 236                is_ioport = false;
 237                break;
 238        case 0x3b0 ... 0x3bb:
 239                count = min(count, (size_t)(0x3bc - pos));
 240                iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
 241                off = pos - 0x3b0;
 242                rsrc = VGA_RSRC_LEGACY_IO;
 243                is_ioport = true;
 244                break;
 245        case 0x3c0 ... 0x3df:
 246                count = min(count, (size_t)(0x3e0 - pos));
 247                iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
 248                off = pos - 0x3c0;
 249                rsrc = VGA_RSRC_LEGACY_IO;
 250                is_ioport = true;
 251                break;
 252        default:
 253                return -EINVAL;
 254        }
 255
 256        if (!iomem)
 257                return -ENOMEM;
 258
 259        ret = vga_get_interruptible(vdev->pdev, rsrc);
 260        if (ret) {
 261                is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
 262                return ret;
 263        }
 264
 265        done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite);
 266
 267        vga_put(vdev->pdev, rsrc);
 268
 269        is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
 270
 271        if (done >= 0)
 272                *ppos += done;
 273
 274        return done;
 275}
 276
 277static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
 278{
 279        struct vfio_pci_ioeventfd *ioeventfd = opaque;
 280
 281        switch (ioeventfd->count) {
 282        case 1:
 283                vfio_iowrite8(ioeventfd->data, ioeventfd->addr);
 284                break;
 285        case 2:
 286                vfio_iowrite16(ioeventfd->data, ioeventfd->addr);
 287                break;
 288        case 4:
 289                vfio_iowrite32(ioeventfd->data, ioeventfd->addr);
 290                break;
 291#ifdef iowrite64
 292        case 8:
 293                vfio_iowrite64(ioeventfd->data, ioeventfd->addr);
 294                break;
 295#endif
 296        }
 297
 298        return 0;
 299}
 300
 301long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
 302                        uint64_t data, int count, int fd)
 303{
 304        struct pci_dev *pdev = vdev->pdev;
 305        loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
 306        int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
 307        struct vfio_pci_ioeventfd *ioeventfd;
 308
 309        /* Only support ioeventfds into BARs */
 310        if (bar > VFIO_PCI_BAR5_REGION_INDEX)
 311                return -EINVAL;
 312
 313        if (pos + count > pci_resource_len(pdev, bar))
 314                return -EINVAL;
 315
 316        /* Disallow ioeventfds working around MSI-X table writes */
 317        if (bar == vdev->msix_bar &&
 318            !(pos + count <= vdev->msix_offset ||
 319              pos >= vdev->msix_offset + vdev->msix_size))
 320                return -EINVAL;
 321
 322#ifndef iowrite64
 323        if (count == 8)
 324                return -EINVAL;
 325#endif
 326
 327        ret = vfio_pci_setup_barmap(vdev, bar);
 328        if (ret)
 329                return ret;
 330
 331        mutex_lock(&vdev->ioeventfds_lock);
 332
 333        list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
 334                if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
 335                    ioeventfd->data == data && ioeventfd->count == count) {
 336                        if (fd == -1) {
 337                                vfio_virqfd_disable(&ioeventfd->virqfd);
 338                                list_del(&ioeventfd->next);
 339                                vdev->ioeventfds_nr--;
 340                                kfree(ioeventfd);
 341                                ret = 0;
 342                        } else
 343                                ret = -EEXIST;
 344
 345                        goto out_unlock;
 346                }
 347        }
 348
 349        if (fd < 0) {
 350                ret = -ENODEV;
 351                goto out_unlock;
 352        }
 353
 354        if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
 355                ret = -ENOSPC;
 356                goto out_unlock;
 357        }
 358
 359        ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL);
 360        if (!ioeventfd) {
 361                ret = -ENOMEM;
 362                goto out_unlock;
 363        }
 364
 365        ioeventfd->addr = vdev->barmap[bar] + pos;
 366        ioeventfd->data = data;
 367        ioeventfd->pos = pos;
 368        ioeventfd->bar = bar;
 369        ioeventfd->count = count;
 370
 371        ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
 372                                 NULL, NULL, &ioeventfd->virqfd, fd);
 373        if (ret) {
 374                kfree(ioeventfd);
 375                goto out_unlock;
 376        }
 377
 378        list_add(&ioeventfd->next, &vdev->ioeventfds_list);
 379        vdev->ioeventfds_nr++;
 380
 381out_unlock:
 382        mutex_unlock(&vdev->ioeventfds_lock);
 383
 384        return ret;
 385}
 386