linux/include/uapi/linux/vfio.h
<<
>>
Prefs
   1/*
   2 * VFIO API definition
   3 *
   4 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
   5 *     Author: Alex Williamson <alex.williamson@redhat.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11#ifndef _UAPIVFIO_H
  12#define _UAPIVFIO_H
  13
  14#include <linux/types.h>
  15#include <linux/ioctl.h>
  16
  17#define VFIO_API_VERSION        0
  18
  19
  20/* Kernel & User level defines for VFIO IOCTLs. */
  21
  22/* Extensions */
  23
  24#define VFIO_TYPE1_IOMMU                1
  25#define VFIO_SPAPR_TCE_IOMMU            2
  26
  27/*
  28 * The IOCTL interface is designed for extensibility by embedding the
  29 * structure length (argsz) and flags into structures passed between
  30 * kernel and userspace.  We therefore use the _IO() macro for these
  31 * defines to avoid implicitly embedding a size into the ioctl request.
  32 * As structure fields are added, argsz will increase to match and flag
  33 * bits will be defined to indicate additional fields with valid data.
  34 * It's *always* the caller's responsibility to indicate the size of
  35 * the structure passed by setting argsz appropriately.
  36 */
  37
  38#define VFIO_TYPE       (';')
  39#define VFIO_BASE       100
  40
  41/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
  42
  43/**
  44 * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
  45 *
  46 * Report the version of the VFIO API.  This allows us to bump the entire
  47 * API version should we later need to add or change features in incompatible
  48 * ways.
  49 * Return: VFIO_API_VERSION
  50 * Availability: Always
  51 */
  52#define VFIO_GET_API_VERSION            _IO(VFIO_TYPE, VFIO_BASE + 0)
  53
  54/**
  55 * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
  56 *
  57 * Check whether an extension is supported.
  58 * Return: 0 if not supported, 1 (or some other positive integer) if supported.
  59 * Availability: Always
  60 */
  61#define VFIO_CHECK_EXTENSION            _IO(VFIO_TYPE, VFIO_BASE + 1)
  62
  63/**
  64 * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
  65 *
  66 * Set the iommu to the given type.  The type must be supported by an
  67 * iommu driver as verified by calling CHECK_EXTENSION using the same
  68 * type.  A group must be set to this file descriptor before this
  69 * ioctl is available.  The IOMMU interfaces enabled by this call are
  70 * specific to the value set.
  71 * Return: 0 on success, -errno on failure
  72 * Availability: When VFIO group attached
  73 */
  74#define VFIO_SET_IOMMU                  _IO(VFIO_TYPE, VFIO_BASE + 2)
  75
  76/* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
  77
  78/**
  79 * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
  80 *                                              struct vfio_group_status)
  81 *
  82 * Retrieve information about the group.  Fills in provided
  83 * struct vfio_group_info.  Caller sets argsz.
  84 * Return: 0 on succes, -errno on failure.
  85 * Availability: Always
  86 */
  87struct vfio_group_status {
  88        __u32   argsz;
  89        __u32   flags;
  90#define VFIO_GROUP_FLAGS_VIABLE         (1 << 0)
  91#define VFIO_GROUP_FLAGS_CONTAINER_SET  (1 << 1)
  92};
  93#define VFIO_GROUP_GET_STATUS           _IO(VFIO_TYPE, VFIO_BASE + 3)
  94
  95/**
  96 * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
  97 *
  98 * Set the container for the VFIO group to the open VFIO file
  99 * descriptor provided.  Groups may only belong to a single
 100 * container.  Containers may, at their discretion, support multiple
 101 * groups.  Only when a container is set are all of the interfaces
 102 * of the VFIO file descriptor and the VFIO group file descriptor
 103 * available to the user.
 104 * Return: 0 on success, -errno on failure.
 105 * Availability: Always
 106 */
 107#define VFIO_GROUP_SET_CONTAINER        _IO(VFIO_TYPE, VFIO_BASE + 4)
 108
 109/**
 110 * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
 111 *
 112 * Remove the group from the attached container.  This is the
 113 * opposite of the SET_CONTAINER call and returns the group to
 114 * an initial state.  All device file descriptors must be released
 115 * prior to calling this interface.  When removing the last group
 116 * from a container, the IOMMU will be disabled and all state lost,
 117 * effectively also returning the VFIO file descriptor to an initial
 118 * state.
 119 * Return: 0 on success, -errno on failure.
 120 * Availability: When attached to container
 121 */
 122#define VFIO_GROUP_UNSET_CONTAINER      _IO(VFIO_TYPE, VFIO_BASE + 5)
 123
 124/**
 125 * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
 126 *
 127 * Return a new file descriptor for the device object described by
 128 * the provided string.  The string should match a device listed in
 129 * the devices subdirectory of the IOMMU group sysfs entry.  The
 130 * group containing the device must already be added to this context.
 131 * Return: new file descriptor on success, -errno on failure.
 132 * Availability: When attached to container
 133 */
 134#define VFIO_GROUP_GET_DEVICE_FD        _IO(VFIO_TYPE, VFIO_BASE + 6)
 135
 136/* --------------- IOCTLs for DEVICE file descriptors --------------- */
 137
 138/**
 139 * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
 140 *                                              struct vfio_device_info)
 141 *
 142 * Retrieve information about the device.  Fills in provided
 143 * struct vfio_device_info.  Caller sets argsz.
 144 * Return: 0 on success, -errno on failure.
 145 */
 146struct vfio_device_info {
 147        __u32   argsz;
 148        __u32   flags;
 149#define VFIO_DEVICE_FLAGS_RESET (1 << 0)        /* Device supports reset */
 150#define VFIO_DEVICE_FLAGS_PCI   (1 << 1)        /* vfio-pci device */
 151        __u32   num_regions;    /* Max region index + 1 */
 152        __u32   num_irqs;       /* Max IRQ index + 1 */
 153};
 154#define VFIO_DEVICE_GET_INFO            _IO(VFIO_TYPE, VFIO_BASE + 7)
 155
 156/**
 157 * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
 158 *                                     struct vfio_region_info)
 159 *
 160 * Retrieve information about a device region.  Caller provides
 161 * struct vfio_region_info with index value set.  Caller sets argsz.
 162 * Implementation of region mapping is bus driver specific.  This is
 163 * intended to describe MMIO, I/O port, as well as bus specific
 164 * regions (ex. PCI config space).  Zero sized regions may be used
 165 * to describe unimplemented regions (ex. unimplemented PCI BARs).
 166 * Return: 0 on success, -errno on failure.
 167 */
 168struct vfio_region_info {
 169        __u32   argsz;
 170        __u32   flags;
 171#define VFIO_REGION_INFO_FLAG_READ      (1 << 0) /* Region supports read */
 172#define VFIO_REGION_INFO_FLAG_WRITE     (1 << 1) /* Region supports write */
 173#define VFIO_REGION_INFO_FLAG_MMAP      (1 << 2) /* Region supports mmap */
 174        __u32   index;          /* Region index */
 175        __u32   resv;           /* Reserved for alignment */
 176        __u64   size;           /* Region size (bytes) */
 177        __u64   offset;         /* Region offset from start of device fd */
 178};
 179#define VFIO_DEVICE_GET_REGION_INFO     _IO(VFIO_TYPE, VFIO_BASE + 8)
 180
 181/**
 182 * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
 183 *                                  struct vfio_irq_info)
 184 *
 185 * Retrieve information about a device IRQ.  Caller provides
 186 * struct vfio_irq_info with index value set.  Caller sets argsz.
 187 * Implementation of IRQ mapping is bus driver specific.  Indexes
 188 * using multiple IRQs are primarily intended to support MSI-like
 189 * interrupt blocks.  Zero count irq blocks may be used to describe
 190 * unimplemented interrupt types.
 191 *
 192 * The EVENTFD flag indicates the interrupt index supports eventfd based
 193 * signaling.
 194 *
 195 * The MASKABLE flags indicates the index supports MASK and UNMASK
 196 * actions described below.
 197 *
 198 * AUTOMASKED indicates that after signaling, the interrupt line is
 199 * automatically masked by VFIO and the user needs to unmask the line
 200 * to receive new interrupts.  This is primarily intended to distinguish
 201 * level triggered interrupts.
 202 *
 203 * The NORESIZE flag indicates that the interrupt lines within the index
 204 * are setup as a set and new subindexes cannot be enabled without first
 205 * disabling the entire index.  This is used for interrupts like PCI MSI
 206 * and MSI-X where the driver may only use a subset of the available
 207 * indexes, but VFIO needs to enable a specific number of vectors
 208 * upfront.  In the case of MSI-X, where the user can enable MSI-X and
 209 * then add and unmask vectors, it's up to userspace to make the decision
 210 * whether to allocate the maximum supported number of vectors or tear
 211 * down setup and incrementally increase the vectors as each is enabled.
 212 */
 213struct vfio_irq_info {
 214        __u32   argsz;
 215        __u32   flags;
 216#define VFIO_IRQ_INFO_EVENTFD           (1 << 0)
 217#define VFIO_IRQ_INFO_MASKABLE          (1 << 1)
 218#define VFIO_IRQ_INFO_AUTOMASKED        (1 << 2)
 219#define VFIO_IRQ_INFO_NORESIZE          (1 << 3)
 220        __u32   index;          /* IRQ index */
 221        __u32   count;          /* Number of IRQs within this index */
 222};
 223#define VFIO_DEVICE_GET_IRQ_INFO        _IO(VFIO_TYPE, VFIO_BASE + 9)
 224
 225/**
 226 * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
 227 *
 228 * Set signaling, masking, and unmasking of interrupts.  Caller provides
 229 * struct vfio_irq_set with all fields set.  'start' and 'count' indicate
 230 * the range of subindexes being specified.
 231 *
 232 * The DATA flags specify the type of data provided.  If DATA_NONE, the
 233 * operation performs the specified action immediately on the specified
 234 * interrupt(s).  For example, to unmask AUTOMASKED interrupt [0,0]:
 235 * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
 236 *
 237 * DATA_BOOL allows sparse support for the same on arrays of interrupts.
 238 * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
 239 * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
 240 * data = {1,0,1}
 241 *
 242 * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
 243 * A value of -1 can be used to either de-assign interrupts if already
 244 * assigned or skip un-assigned interrupts.  For example, to set an eventfd
 245 * to be trigger for interrupts [0,0] and [0,2]:
 246 * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
 247 * data = {fd1, -1, fd2}
 248 * If index [0,1] is previously set, two count = 1 ioctls calls would be
 249 * required to set [0,0] and [0,2] without changing [0,1].
 250 *
 251 * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
 252 * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
 253 * from userspace (ie. simulate hardware triggering).
 254 *
 255 * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
 256 * enables the interrupt index for the device.  Individual subindex interrupts
 257 * can be disabled using the -1 value for DATA_EVENTFD or the index can be
 258 * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
 259 *
 260 * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
 261 * ACTION_TRIGGER specifies kernel->user signaling.
 262 */
 263struct vfio_irq_set {
 264        __u32   argsz;
 265        __u32   flags;
 266#define VFIO_IRQ_SET_DATA_NONE          (1 << 0) /* Data not present */
 267#define VFIO_IRQ_SET_DATA_BOOL          (1 << 1) /* Data is bool (u8) */
 268#define VFIO_IRQ_SET_DATA_EVENTFD       (1 << 2) /* Data is eventfd (s32) */
 269#define VFIO_IRQ_SET_ACTION_MASK        (1 << 3) /* Mask interrupt */
 270#define VFIO_IRQ_SET_ACTION_UNMASK      (1 << 4) /* Unmask interrupt */
 271#define VFIO_IRQ_SET_ACTION_TRIGGER     (1 << 5) /* Trigger interrupt */
 272        __u32   index;
 273        __u32   start;
 274        __u32   count;
 275        __u8    data[];
 276};
 277#define VFIO_DEVICE_SET_IRQS            _IO(VFIO_TYPE, VFIO_BASE + 10)
 278
 279#define VFIO_IRQ_SET_DATA_TYPE_MASK     (VFIO_IRQ_SET_DATA_NONE | \
 280                                         VFIO_IRQ_SET_DATA_BOOL | \
 281                                         VFIO_IRQ_SET_DATA_EVENTFD)
 282#define VFIO_IRQ_SET_ACTION_TYPE_MASK   (VFIO_IRQ_SET_ACTION_MASK | \
 283                                         VFIO_IRQ_SET_ACTION_UNMASK | \
 284                                         VFIO_IRQ_SET_ACTION_TRIGGER)
 285/**
 286 * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
 287 *
 288 * Reset a device.
 289 */
 290#define VFIO_DEVICE_RESET               _IO(VFIO_TYPE, VFIO_BASE + 11)
 291
 292/*
 293 * The VFIO-PCI bus driver makes use of the following fixed region and
 294 * IRQ index mapping.  Unimplemented regions return a size of zero.
 295 * Unimplemented IRQ types return a count of zero.
 296 */
 297
 298enum {
 299        VFIO_PCI_BAR0_REGION_INDEX,
 300        VFIO_PCI_BAR1_REGION_INDEX,
 301        VFIO_PCI_BAR2_REGION_INDEX,
 302        VFIO_PCI_BAR3_REGION_INDEX,
 303        VFIO_PCI_BAR4_REGION_INDEX,
 304        VFIO_PCI_BAR5_REGION_INDEX,
 305        VFIO_PCI_ROM_REGION_INDEX,
 306        VFIO_PCI_CONFIG_REGION_INDEX,
 307        /*
 308         * Expose VGA regions defined for PCI base class 03, subclass 00.
 309         * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df
 310         * as well as the MMIO range 0xa0000 to 0xbffff.  Each implemented
 311         * range is found at it's identity mapped offset from the region
 312         * offset, for example 0x3b0 is region_info.offset + 0x3b0.  Areas
 313         * between described ranges are unimplemented.
 314         */
 315        VFIO_PCI_VGA_REGION_INDEX,
 316        VFIO_PCI_NUM_REGIONS
 317};
 318
 319enum {
 320        VFIO_PCI_INTX_IRQ_INDEX,
 321        VFIO_PCI_MSI_IRQ_INDEX,
 322        VFIO_PCI_MSIX_IRQ_INDEX,
 323        VFIO_PCI_ERR_IRQ_INDEX,
 324        VFIO_PCI_NUM_IRQS
 325};
 326
 327/**
 328 * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12,
 329 *                                            struct vfio_pci_hot_reset_info)
 330 *
 331 * Return: 0 on success, -errno on failure:
 332 *      -enospc = insufficient buffer, -enodev = unsupported for device.
 333 */
 334struct vfio_pci_dependent_device {
 335        __u32   group_id;
 336        __u16   segment;
 337        __u8    bus;
 338        __u8    devfn; /* Use PCI_SLOT/PCI_FUNC */
 339};
 340
 341struct vfio_pci_hot_reset_info {
 342        __u32   argsz;
 343        __u32   flags;
 344        __u32   count;
 345        struct vfio_pci_dependent_device        devices[];
 346};
 347
 348#define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO      _IO(VFIO_TYPE, VFIO_BASE + 12)
 349
 350/**
 351 * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
 352 *                                  struct vfio_pci_hot_reset)
 353 *
 354 * Return: 0 on success, -errno on failure.
 355 */
 356struct vfio_pci_hot_reset {
 357        __u32   argsz;
 358        __u32   flags;
 359        __u32   count;
 360        __s32   group_fds[];
 361};
 362
 363#define VFIO_DEVICE_PCI_HOT_RESET       _IO(VFIO_TYPE, VFIO_BASE + 13)
 364
 365/* -------- API for Type1 VFIO IOMMU -------- */
 366
 367/**
 368 * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info)
 369 *
 370 * Retrieve information about the IOMMU object. Fills in provided
 371 * struct vfio_iommu_info. Caller sets argsz.
 372 *
 373 * XXX Should we do these by CHECK_EXTENSION too?
 374 */
 375struct vfio_iommu_type1_info {
 376        __u32   argsz;
 377        __u32   flags;
 378#define VFIO_IOMMU_INFO_PGSIZES (1 << 0)        /* supported page sizes info */
 379        __u64   iova_pgsizes;           /* Bitmap of supported page sizes */
 380};
 381
 382#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
 383
 384/**
 385 * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map)
 386 *
 387 * Map process virtual addresses to IO virtual addresses using the
 388 * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
 389 */
 390struct vfio_iommu_type1_dma_map {
 391        __u32   argsz;
 392        __u32   flags;
 393#define VFIO_DMA_MAP_FLAG_READ (1 << 0)         /* readable from device */
 394#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)        /* writable from device */
 395        __u64   vaddr;                          /* Process virtual address */
 396        __u64   iova;                           /* IO virtual address */
 397        __u64   size;                           /* Size of mapping (bytes) */
 398};
 399
 400#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
 401
 402/**
 403 * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
 404 *                                                      struct vfio_dma_unmap)
 405 *
 406 * Unmap IO virtual addresses using the provided struct vfio_dma_unmap.
 407 * Caller sets argsz.  The actual unmapped size is returned in the size
 408 * field.  No guarantee is made to the user that arbitrary unmaps of iova
 409 * or size different from those used in the original mapping call will
 410 * succeed.
 411 */
 412struct vfio_iommu_type1_dma_unmap {
 413        __u32   argsz;
 414        __u32   flags;
 415        __u64   iova;                           /* IO virtual address */
 416        __u64   size;                           /* Size of mapping (bytes) */
 417};
 418
 419#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
 420
 421/*
 422 * IOCTLs to enable/disable IOMMU container usage.
 423 * No parameters are supported.
 424 */
 425#define VFIO_IOMMU_ENABLE       _IO(VFIO_TYPE, VFIO_BASE + 15)
 426#define VFIO_IOMMU_DISABLE      _IO(VFIO_TYPE, VFIO_BASE + 16)
 427
 428/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
 429
 430/*
 431 * The SPAPR TCE info struct provides the information about the PCI bus
 432 * address ranges available for DMA, these values are programmed into
 433 * the hardware so the guest has to know that information.
 434 *
 435 * The DMA 32 bit window start is an absolute PCI bus address.
 436 * The IOVA address passed via map/unmap ioctls are absolute PCI bus
 437 * addresses too so the window works as a filter rather than an offset
 438 * for IOVA addresses.
 439 *
 440 * A flag will need to be added if other page sizes are supported,
 441 * so as defined here, it is always 4k.
 442 */
 443struct vfio_iommu_spapr_tce_info {
 444        __u32 argsz;
 445        __u32 flags;                    /* reserved for future use */
 446        __u32 dma32_window_start;       /* 32 bit window start (bytes) */
 447        __u32 dma32_window_size;        /* 32 bit window size (bytes) */
 448};
 449
 450#define VFIO_IOMMU_SPAPR_TCE_GET_INFO   _IO(VFIO_TYPE, VFIO_BASE + 12)
 451
 452/* ***************************************************************** */
 453
 454#endif /* _UAPIVFIO_H */
 455