linux/drivers/vfio/pci/vfio_pci_rdwr.c
<<
>>
Prefs
   1/*
   2 * VFIO PCI I/O Port & MMIO access
   3 *
   4 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
   5 *     Author: Alex Williamson <alex.williamson@redhat.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * Derived from original vfio:
  12 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
  13 * Author: Tom Lyon, pugs@cisco.com
  14 */
  15
  16#include <linux/fs.h>
  17#include <linux/pci.h>
  18#include <linux/uaccess.h>
  19#include <linux/io.h>
  20#include <linux/vfio.h>
  21#include <linux/vgaarb.h>
  22
  23#include "vfio_pci_private.h"
  24
  25#ifdef __LITTLE_ENDIAN
  26#define vfio_ioread64   ioread64
  27#define vfio_iowrite64  iowrite64
  28#define vfio_ioread32   ioread32
  29#define vfio_iowrite32  iowrite32
  30#define vfio_ioread16   ioread16
  31#define vfio_iowrite16  iowrite16
  32#else
  33#define vfio_ioread64   ioread64be
  34#define vfio_iowrite64  iowrite64be
  35#define vfio_ioread32   ioread32be
  36#define vfio_iowrite32  iowrite32be
  37#define vfio_ioread16   ioread16be
  38#define vfio_iowrite16  iowrite16be
  39#endif
  40#define vfio_ioread8    ioread8
  41#define vfio_iowrite8   iowrite8
  42
  43/*
  44 * Read or write from an __iomem region (MMIO or I/O port) with an excluded
  45 * range which is inaccessible.  The excluded range drops writes and fills
  46 * reads with -1.  This is intended for handling MSI-X vector tables and
  47 * leftover space for ROM BARs.
  48 */
  49static ssize_t do_io_rw(void __iomem *io, char __user *buf,
  50                        loff_t off, size_t count, size_t x_start,
  51                        size_t x_end, bool iswrite)
  52{
  53        ssize_t done = 0;
  54
  55        while (count) {
  56                size_t fillable, filled;
  57
  58                if (off < x_start)
  59                        fillable = min(count, (size_t)(x_start - off));
  60                else if (off >= x_end)
  61                        fillable = count;
  62                else
  63                        fillable = 0;
  64
  65                if (fillable >= 4 && !(off % 4)) {
  66                        u32 val;
  67
  68                        if (iswrite) {
  69                                if (copy_from_user(&val, buf, 4))
  70                                        return -EFAULT;
  71
  72                                vfio_iowrite32(val, io + off);
  73                        } else {
  74                                val = vfio_ioread32(io + off);
  75
  76                                if (copy_to_user(buf, &val, 4))
  77                                        return -EFAULT;
  78                        }
  79
  80                        filled = 4;
  81                } else if (fillable >= 2 && !(off % 2)) {
  82                        u16 val;
  83
  84                        if (iswrite) {
  85                                if (copy_from_user(&val, buf, 2))
  86                                        return -EFAULT;
  87
  88                                vfio_iowrite16(val, io + off);
  89                        } else {
  90                                val = vfio_ioread16(io + off);
  91
  92                                if (copy_to_user(buf, &val, 2))
  93                                        return -EFAULT;
  94                        }
  95
  96                        filled = 2;
  97                } else if (fillable) {
  98                        u8 val;
  99
 100                        if (iswrite) {
 101                                if (copy_from_user(&val, buf, 1))
 102                                        return -EFAULT;
 103
 104                                vfio_iowrite8(val, io + off);
 105                        } else {
 106                                val = vfio_ioread8(io + off);
 107
 108                                if (copy_to_user(buf, &val, 1))
 109                                        return -EFAULT;
 110                        }
 111
 112                        filled = 1;
 113                } else {
 114                        /* Fill reads with -1, drop writes */
 115                        filled = min(count, (size_t)(x_end - off));
 116                        if (!iswrite) {
 117                                u8 val = 0xFF;
 118                                size_t i;
 119
 120                                for (i = 0; i < filled; i++)
 121                                        if (copy_to_user(buf + i, &val, 1))
 122                                                return -EFAULT;
 123                        }
 124                }
 125
 126                count -= filled;
 127                done += filled;
 128                off += filled;
 129                buf += filled;
 130        }
 131
 132        return done;
 133}
 134
 135static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar)
 136{
 137        struct pci_dev *pdev = vdev->pdev;
 138        int ret;
 139        void __iomem *io;
 140
 141        if (vdev->barmap[bar])
 142                return 0;
 143
 144        ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
 145        if (ret)
 146                return ret;
 147
 148        io = pci_iomap(pdev, bar, 0);
 149        if (!io) {
 150                pci_release_selected_regions(pdev, 1 << bar);
 151                return -ENOMEM;
 152        }
 153
 154        vdev->barmap[bar] = io;
 155
 156        return 0;
 157}
 158
 159ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
 160                        size_t count, loff_t *ppos, bool iswrite)
 161{
 162        struct pci_dev *pdev = vdev->pdev;
 163        loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
 164        int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 165        size_t x_start = 0, x_end = 0;
 166        resource_size_t end;
 167        void __iomem *io;
 168        ssize_t done;
 169
 170        if (pci_resource_start(pdev, bar))
 171                end = pci_resource_len(pdev, bar);
 172        else if (bar == PCI_ROM_RESOURCE &&
 173                 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
 174                end = 0x20000;
 175        else
 176                return -EINVAL;
 177
 178        if (pos >= end)
 179                return -EINVAL;
 180
 181        count = min(count, (size_t)(end - pos));
 182
 183        if (bar == PCI_ROM_RESOURCE) {
 184                /*
 185                 * The ROM can fill less space than the BAR, so we start the
 186                 * excluded range at the end of the actual ROM.  This makes
 187                 * filling large ROM BARs much faster.
 188                 */
 189                io = pci_map_rom(pdev, &x_start);
 190                if (!io)
 191                        return -ENOMEM;
 192                x_end = end;
 193        } else {
 194                int ret = vfio_pci_setup_barmap(vdev, bar);
 195                if (ret)
 196                        return ret;
 197
 198                io = vdev->barmap[bar];
 199        }
 200
 201        if (bar == vdev->msix_bar) {
 202                x_start = vdev->msix_offset;
 203                x_end = vdev->msix_offset + vdev->msix_size;
 204        }
 205
 206        done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
 207
 208        if (done >= 0)
 209                *ppos += done;
 210
 211        if (bar == PCI_ROM_RESOURCE)
 212                pci_unmap_rom(pdev, io);
 213
 214        return done;
 215}
 216
 217ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
 218                               size_t count, loff_t *ppos, bool iswrite)
 219{
 220        int ret;
 221        loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
 222        void __iomem *iomem = NULL;
 223        unsigned int rsrc;
 224        bool is_ioport;
 225        ssize_t done;
 226
 227        if (!vdev->has_vga)
 228                return -EINVAL;
 229
 230        if (pos > 0xbfffful)
 231                return -EINVAL;
 232
 233        switch ((u32)pos) {
 234        case 0xa0000 ... 0xbffff:
 235                count = min(count, (size_t)(0xc0000 - pos));
 236                iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1);
 237                off = pos - 0xa0000;
 238                rsrc = VGA_RSRC_LEGACY_MEM;
 239                is_ioport = false;
 240                break;
 241        case 0x3b0 ... 0x3bb:
 242                count = min(count, (size_t)(0x3bc - pos));
 243                iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
 244                off = pos - 0x3b0;
 245                rsrc = VGA_RSRC_LEGACY_IO;
 246                is_ioport = true;
 247                break;
 248        case 0x3c0 ... 0x3df:
 249                count = min(count, (size_t)(0x3e0 - pos));
 250                iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
 251                off = pos - 0x3c0;
 252                rsrc = VGA_RSRC_LEGACY_IO;
 253                is_ioport = true;
 254                break;
 255        default:
 256                return -EINVAL;
 257        }
 258
 259        if (!iomem)
 260                return -ENOMEM;
 261
 262        ret = vga_get_interruptible(vdev->pdev, rsrc);
 263        if (ret) {
 264                is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
 265                return ret;
 266        }
 267
 268        done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite);
 269
 270        vga_put(vdev->pdev, rsrc);
 271
 272        is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
 273
 274        if (done >= 0)
 275                *ppos += done;
 276
 277        return done;
 278}
 279
 280static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
 281{
 282        struct vfio_pci_ioeventfd *ioeventfd = opaque;
 283
 284        switch (ioeventfd->count) {
 285        case 1:
 286                vfio_iowrite8(ioeventfd->data, ioeventfd->addr);
 287                break;
 288        case 2:
 289                vfio_iowrite16(ioeventfd->data, ioeventfd->addr);
 290                break;
 291        case 4:
 292                vfio_iowrite32(ioeventfd->data, ioeventfd->addr);
 293                break;
 294#ifdef iowrite64
 295        case 8:
 296                vfio_iowrite64(ioeventfd->data, ioeventfd->addr);
 297                break;
 298#endif
 299        }
 300
 301        return 0;
 302}
 303
 304long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
 305                        uint64_t data, int count, int fd)
 306{
 307        struct pci_dev *pdev = vdev->pdev;
 308        loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
 309        int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
 310        struct vfio_pci_ioeventfd *ioeventfd;
 311
 312        /* Only support ioeventfds into BARs */
 313        if (bar > VFIO_PCI_BAR5_REGION_INDEX)
 314                return -EINVAL;
 315
 316        if (pos + count > pci_resource_len(pdev, bar))
 317                return -EINVAL;
 318
 319        /* Disallow ioeventfds working around MSI-X table writes */
 320        if (bar == vdev->msix_bar &&
 321            !(pos + count <= vdev->msix_offset ||
 322              pos >= vdev->msix_offset + vdev->msix_size))
 323                return -EINVAL;
 324
 325#ifndef iowrite64
 326        if (count == 8)
 327                return -EINVAL;
 328#endif
 329
 330        ret = vfio_pci_setup_barmap(vdev, bar);
 331        if (ret)
 332                return ret;
 333
 334        mutex_lock(&vdev->ioeventfds_lock);
 335
 336        list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
 337                if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
 338                    ioeventfd->data == data && ioeventfd->count == count) {
 339                        if (fd == -1) {
 340                                vfio_virqfd_disable(&ioeventfd->virqfd);
 341                                list_del(&ioeventfd->next);
 342                                vdev->ioeventfds_nr--;
 343                                kfree(ioeventfd);
 344                                ret = 0;
 345                        } else
 346                                ret = -EEXIST;
 347
 348                        goto out_unlock;
 349                }
 350        }
 351
 352        if (fd < 0) {
 353                ret = -ENODEV;
 354                goto out_unlock;
 355        }
 356
 357        if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
 358                ret = -ENOSPC;
 359                goto out_unlock;
 360        }
 361
 362        ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL);
 363        if (!ioeventfd) {
 364                ret = -ENOMEM;
 365                goto out_unlock;
 366        }
 367
 368        ioeventfd->addr = vdev->barmap[bar] + pos;
 369        ioeventfd->data = data;
 370        ioeventfd->pos = pos;
 371        ioeventfd->bar = bar;
 372        ioeventfd->count = count;
 373
 374        ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
 375                                 NULL, NULL, &ioeventfd->virqfd, fd);
 376        if (ret) {
 377                kfree(ioeventfd);
 378                goto out_unlock;
 379        }
 380
 381        list_add(&ioeventfd->next, &vdev->ioeventfds_list);
 382        vdev->ioeventfds_nr++;
 383
 384out_unlock:
 385        mutex_unlock(&vdev->ioeventfds_lock);
 386
 387        return ret;
 388}
 389