qemu/include/block/block-common.h
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator block driver
   3 *
   4 * Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24#ifndef BLOCK_COMMON_H
  25#define BLOCK_COMMON_H
  26
  27#include "block/aio.h"
  28#include "block/aio-wait.h"
  29#include "qemu/iov.h"
  30#include "qemu/coroutine.h"
  31#include "block/accounting.h"
  32#include "block/dirty-bitmap.h"
  33#include "block/blockjob.h"
  34#include "qemu/hbitmap.h"
  35#include "qemu/transactions.h"
  36
  37/*
  38 * generated_co_wrapper
  39 *
  40 * Function specifier, which does nothing but mark functions to be
  41 * generated by scripts/block-coroutine-wrapper.py
  42 *
  43 * Read more in docs/devel/block-coroutine-wrapper.rst
  44 */
  45#define generated_co_wrapper
  46
  47/* block.c */
  48typedef struct BlockDriver BlockDriver;
  49typedef struct BdrvChild BdrvChild;
  50typedef struct BdrvChildClass BdrvChildClass;
  51
  52typedef struct BlockDriverInfo {
  53    /* in bytes, 0 if irrelevant */
  54    int cluster_size;
  55    /* offset at which the VM state can be saved (0 if not possible) */
  56    int64_t vm_state_offset;
  57    bool is_dirty;
  58    /*
  59     * True if this block driver only supports compressed writes
  60     */
  61    bool needs_compressed_writes;
  62} BlockDriverInfo;
  63
  64typedef struct BlockFragInfo {
  65    uint64_t allocated_clusters;
  66    uint64_t total_clusters;
  67    uint64_t fragmented_clusters;
  68    uint64_t compressed_clusters;
  69} BlockFragInfo;
  70
  71typedef enum {
  72    BDRV_REQ_COPY_ON_READ       = 0x1,
  73    BDRV_REQ_ZERO_WRITE         = 0x2,
  74
  75    /*
  76     * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
  77     * that the block driver should unmap (discard) blocks if it is guaranteed
  78     * that the result will read back as zeroes. The flag is only passed to the
  79     * driver if the block device is opened with BDRV_O_UNMAP.
  80     */
  81    BDRV_REQ_MAY_UNMAP          = 0x4,
  82
  83    BDRV_REQ_FUA                = 0x10,
  84    BDRV_REQ_WRITE_COMPRESSED   = 0x20,
  85
  86    /*
  87     * Signifies that this write request will not change the visible disk
  88     * content.
  89     */
  90    BDRV_REQ_WRITE_UNCHANGED    = 0x40,
  91
  92    /*
  93     * Forces request serialisation. Use only with write requests.
  94     */
  95    BDRV_REQ_SERIALISING        = 0x80,
  96
  97    /*
  98     * Execute the request only if the operation can be offloaded or otherwise
  99     * be executed efficiently, but return an error instead of using a slow
 100     * fallback.
 101     */
 102    BDRV_REQ_NO_FALLBACK        = 0x100,
 103
 104    /*
 105     * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
 106     * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
 107     * filter is involved), in which case it signals that the COR operation
 108     * need not read the data into memory (qiov) but only ensure they are
 109     * copied to the top layer (i.e., that COR operation is done).
 110     */
 111    BDRV_REQ_PREFETCH  = 0x200,
 112
 113    /*
 114     * If we need to wait for other requests, just fail immediately. Used
 115     * only together with BDRV_REQ_SERIALISING. Used only with requests aligned
 116     * to request_alignment (corresponding assertions are in block/io.c).
 117     */
 118    BDRV_REQ_NO_WAIT = 0x400,
 119
 120    /* Mask of valid flags */
 121    BDRV_REQ_MASK               = 0x7ff,
 122} BdrvRequestFlags;
 123
 124#define BDRV_O_NO_SHARE    0x0001 /* don't share permissions */
 125#define BDRV_O_RDWR        0x0002
 126#define BDRV_O_RESIZE      0x0004 /* request permission for resizing the node */
 127#define BDRV_O_SNAPSHOT    0x0008 /* open the file read only and save
 128                                     writes in a snapshot */
 129#define BDRV_O_TEMPORARY   0x0010 /* delete the file after use */
 130#define BDRV_O_NOCACHE     0x0020 /* do not use the host page cache */
 131#define BDRV_O_NATIVE_AIO  0x0080 /* use native AIO instead of the
 132                                     thread pool */
 133#define BDRV_O_NO_BACKING  0x0100 /* don't open the backing file */
 134#define BDRV_O_NO_FLUSH    0x0200 /* disable flushing on this disk */
 135#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
 136#define BDRV_O_INACTIVE    0x0800  /* consistency hint for migration handoff */
 137#define BDRV_O_CHECK       0x1000  /* open solely for consistency check */
 138#define BDRV_O_ALLOW_RDWR  0x2000  /* allow reopen to change from r/o to r/w */
 139#define BDRV_O_UNMAP       0x4000  /* execute guest UNMAP/TRIM operations */
 140#define BDRV_O_PROTOCOL    0x8000  /* if no block driver is explicitly given:
 141                                      select an appropriate protocol driver,
 142                                      ignoring the format layer */
 143#define BDRV_O_NO_IO       0x10000 /* don't initialize for I/O */
 144#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening
 145                                      read-write fails */
 146#define BDRV_O_IO_URING    0x40000 /* use io_uring instead of the thread pool */
 147
 148#define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
 149
 150
 151/* Option names of options parsed by the block layer */
 152
 153#define BDRV_OPT_CACHE_WB       "cache.writeback"
 154#define BDRV_OPT_CACHE_DIRECT   "cache.direct"
 155#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
 156#define BDRV_OPT_READ_ONLY      "read-only"
 157#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
 158#define BDRV_OPT_DISCARD        "discard"
 159#define BDRV_OPT_FORCE_SHARE    "force-share"
 160
 161
 162#define BDRV_SECTOR_BITS   9
 163#define BDRV_SECTOR_SIZE   (1ULL << BDRV_SECTOR_BITS)
 164
 165#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
 166                                           INT_MAX >> BDRV_SECTOR_BITS)
 167#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
 168
 169/*
 170 * We want allow aligning requests and disk length up to any 32bit alignment
 171 * and don't afraid of overflow.
 172 * To achieve it, and in the same time use some pretty number as maximum disk
 173 * size, let's define maximum "length" (a limit for any offset/bytes request and
 174 * for disk size) to be the greatest power of 2 less than INT64_MAX.
 175 */
 176#define BDRV_MAX_ALIGNMENT (1L << 30)
 177#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
 178
 179/*
 180 * Allocation status flags for bdrv_block_status() and friends.
 181 *
 182 * Public flags:
 183 * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
 184 * BDRV_BLOCK_ZERO: offset reads as zero
 185 * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
 186 * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
 187 *                       layer rather than any backing, set by block layer
 188 * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
 189 *                 layer, set by block layer
 190 *
 191 * Internal flags:
 192 * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
 193 *                 that the block layer recompute the answer from the returned
 194 *                 BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
 195 * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
 196 *                     zeroes in file child of current block node inside
 197 *                     returned region. Only valid together with both
 198 *                     BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
 199 *                     appear with BDRV_BLOCK_ZERO.
 200 *
 201 * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
 202 * host offset within the returned BDS that is allocated for the
 203 * corresponding raw guest data.  However, whether that offset
 204 * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
 205 *
 206 * DATA ZERO OFFSET_VALID
 207 *  t    t        t       sectors read as zero, returned file is zero at offset
 208 *  t    f        t       sectors read as valid from file at offset
 209 *  f    t        t       sectors preallocated, read as zero, returned file not
 210 *                        necessarily zero at offset
 211 *  f    f        t       sectors preallocated but read from backing_hd,
 212 *                        returned file contains garbage at offset
 213 *  t    t        f       sectors preallocated, read as zero, unknown offset
 214 *  t    f        f       sectors read from unknown file or offset
 215 *  f    t        f       not allocated or unknown offset, read as zero
 216 *  f    f        f       not allocated or unknown offset, read from backing_hd
 217 */
 218#define BDRV_BLOCK_DATA         0x01
 219#define BDRV_BLOCK_ZERO         0x02
 220#define BDRV_BLOCK_OFFSET_VALID 0x04
 221#define BDRV_BLOCK_RAW          0x08
 222#define BDRV_BLOCK_ALLOCATED    0x10
 223#define BDRV_BLOCK_EOF          0x20
 224#define BDRV_BLOCK_RECURSE      0x40
 225
 226typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
 227
 228typedef struct BDRVReopenState {
 229    BlockDriverState *bs;
 230    int flags;
 231    BlockdevDetectZeroesOptions detect_zeroes;
 232    bool backing_missing;
 233    BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
 234    BlockDriverState *old_file_bs; /* keep pointer for permissions update */
 235    QDict *options;
 236    QDict *explicit_options;
 237    void *opaque;
 238} BDRVReopenState;
 239
 240/*
 241 * Block operation types
 242 */
 243typedef enum BlockOpType {
 244    BLOCK_OP_TYPE_BACKUP_SOURCE,
 245    BLOCK_OP_TYPE_BACKUP_TARGET,
 246    BLOCK_OP_TYPE_CHANGE,
 247    BLOCK_OP_TYPE_COMMIT_SOURCE,
 248    BLOCK_OP_TYPE_COMMIT_TARGET,
 249    BLOCK_OP_TYPE_DATAPLANE,
 250    BLOCK_OP_TYPE_DRIVE_DEL,
 251    BLOCK_OP_TYPE_EJECT,
 252    BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
 253    BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
 254    BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
 255    BLOCK_OP_TYPE_MIRROR_SOURCE,
 256    BLOCK_OP_TYPE_MIRROR_TARGET,
 257    BLOCK_OP_TYPE_RESIZE,
 258    BLOCK_OP_TYPE_STREAM,
 259    BLOCK_OP_TYPE_REPLACE,
 260    BLOCK_OP_TYPE_MAX,
 261} BlockOpType;
 262
 263/* Block node permission constants */
 264enum {
 265    /**
 266     * A user that has the "permission" of consistent reads is guaranteed that
 267     * their view of the contents of the block device is complete and
 268     * self-consistent, representing the contents of a disk at a specific
 269     * point.
 270     *
 271     * For most block devices (including their backing files) this is true, but
 272     * the property cannot be maintained in a few situations like for
 273     * intermediate nodes of a commit block job.
 274     */
 275    BLK_PERM_CONSISTENT_READ    = 0x01,
 276
 277    /** This permission is required to change the visible disk contents. */
 278    BLK_PERM_WRITE              = 0x02,
 279
 280    /**
 281     * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
 282     * required for writes to the block node when the caller promises that
 283     * the visible disk content doesn't change.
 284     *
 285     * As the BLK_PERM_WRITE permission is strictly stronger, either is
 286     * sufficient to perform an unchanging write.
 287     */
 288    BLK_PERM_WRITE_UNCHANGED    = 0x04,
 289
 290    /** This permission is required to change the size of a block node. */
 291    BLK_PERM_RESIZE             = 0x08,
 292
 293    /**
 294     * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
 295     * 6.1 and earlier may still lock the corresponding byte in block/file-posix
 296     * locking.  So, implementing some new permission should be very careful to
 297     * not interfere with this old unused thing.
 298     */
 299
 300    BLK_PERM_ALL                = 0x0f,
 301
 302    DEFAULT_PERM_PASSTHROUGH    = BLK_PERM_CONSISTENT_READ
 303                                 | BLK_PERM_WRITE
 304                                 | BLK_PERM_WRITE_UNCHANGED
 305                                 | BLK_PERM_RESIZE,
 306
 307    DEFAULT_PERM_UNCHANGED      = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
 308};
 309
 310/*
 311 * Flags that parent nodes assign to child nodes to specify what kind of
 312 * role(s) they take.
 313 *
 314 * At least one of DATA, METADATA, FILTERED, or COW must be set for
 315 * every child.
 316 */
 317enum BdrvChildRoleBits {
 318    /*
 319     * This child stores data.
 320     * Any node may have an arbitrary number of such children.
 321     */
 322    BDRV_CHILD_DATA         = (1 << 0),
 323
 324    /*
 325     * This child stores metadata.
 326     * Any node may have an arbitrary number of metadata-storing
 327     * children.
 328     */
 329    BDRV_CHILD_METADATA     = (1 << 1),
 330
 331    /*
 332     * A child that always presents exactly the same visible data as
 333     * the parent, e.g. by virtue of the parent forwarding all reads
 334     * and writes.
 335     * This flag is mutually exclusive with DATA, METADATA, and COW.
 336     * Any node may have at most one filtered child at a time.
 337     */
 338    BDRV_CHILD_FILTERED     = (1 << 2),
 339
 340    /*
 341     * Child from which to read all data that isn't allocated in the
 342     * parent (i.e., the backing child); such data is copied to the
 343     * parent through COW (and optionally COR).
 344     * This field is mutually exclusive with DATA, METADATA, and
 345     * FILTERED.
 346     * Any node may have at most one such backing child at a time.
 347     */
 348    BDRV_CHILD_COW          = (1 << 3),
 349
 350    /*
 351     * The primary child.  For most drivers, this is the child whose
 352     * filename applies best to the parent node.
 353     * Any node may have at most one primary child at a time.
 354     */
 355    BDRV_CHILD_PRIMARY      = (1 << 4),
 356
 357    /* Useful combination of flags */
 358    BDRV_CHILD_IMAGE        = BDRV_CHILD_DATA
 359                              | BDRV_CHILD_METADATA
 360                              | BDRV_CHILD_PRIMARY,
 361};
 362
 363/* Mask of BdrvChildRoleBits values */
 364typedef unsigned int BdrvChildRole;
 365
 366typedef struct BdrvCheckResult {
 367    int corruptions;
 368    int leaks;
 369    int check_errors;
 370    int corruptions_fixed;
 371    int leaks_fixed;
 372    int64_t image_end_offset;
 373    BlockFragInfo bfi;
 374} BdrvCheckResult;
 375
 376typedef enum {
 377    BDRV_FIX_LEAKS    = 1,
 378    BDRV_FIX_ERRORS   = 2,
 379} BdrvCheckMode;
 380
 381typedef struct BlockSizes {
 382    uint32_t phys;
 383    uint32_t log;
 384} BlockSizes;
 385
 386typedef struct HDGeometry {
 387    uint32_t heads;
 388    uint32_t sectors;
 389    uint32_t cylinders;
 390} HDGeometry;
 391
 392/*
 393 * Common functions that are neither I/O nor Global State.
 394 *
 395 * These functions must never call any function from other categories
 396 * (I/O, "I/O or GS", Global State) except this one, but can be invoked by
 397 * all of them.
 398 */
 399
 400char *bdrv_perm_names(uint64_t perm);
 401uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
 402
 403void bdrv_init_with_whitelist(void);
 404bool bdrv_uses_whitelist(void);
 405int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
 406
 407int bdrv_parse_aio(const char *mode, int *flags);
 408int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
 409int bdrv_parse_discard_flags(const char *mode, int *flags);
 410
 411int path_has_protocol(const char *path);
 412int path_is_absolute(const char *path);
 413char *path_combine(const char *base_path, const char *filename);
 414
 415char *bdrv_get_full_backing_filename_from_filename(const char *backed,
 416                                                   const char *backing,
 417                                                   Error **errp);
 418
 419#endif /* BLOCK_COMMON_H */
 420