qemu/include/block/block_int.h
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator block driver
   3 *
   4 * Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24#ifndef BLOCK_INT_H
  25#define BLOCK_INT_H
  26
  27#include "block/accounting.h"
  28#include "block/block.h"
  29#include "qemu/option.h"
  30#include "qemu/queue.h"
  31#include "qemu/coroutine.h"
  32#include "qemu/timer.h"
  33#include "qapi-types.h"
  34#include "qemu/hbitmap.h"
  35#include "block/snapshot.h"
  36#include "qemu/main-loop.h"
  37#include "qemu/throttle.h"
  38
  39#define BLOCK_FLAG_ENCRYPT          1
  40#define BLOCK_FLAG_LAZY_REFCOUNTS   8
  41
  42#define BLOCK_OPT_SIZE              "size"
  43#define BLOCK_OPT_ENCRYPT           "encryption"
  44#define BLOCK_OPT_COMPAT6           "compat6"
  45#define BLOCK_OPT_HWVERSION         "hwversion"
  46#define BLOCK_OPT_BACKING_FILE      "backing_file"
  47#define BLOCK_OPT_BACKING_FMT       "backing_fmt"
  48#define BLOCK_OPT_CLUSTER_SIZE      "cluster_size"
  49#define BLOCK_OPT_TABLE_SIZE        "table_size"
  50#define BLOCK_OPT_PREALLOC          "preallocation"
  51#define BLOCK_OPT_SUBFMT            "subformat"
  52#define BLOCK_OPT_COMPAT_LEVEL      "compat"
  53#define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
  54#define BLOCK_OPT_ADAPTER_TYPE      "adapter_type"
  55#define BLOCK_OPT_REDUNDANCY        "redundancy"
  56#define BLOCK_OPT_NOCOW             "nocow"
  57#define BLOCK_OPT_OBJECT_SIZE       "object_size"
  58#define BLOCK_OPT_REFCOUNT_BITS     "refcount_bits"
  59
  60#define BLOCK_PROBE_BUF_SIZE        512
  61
  62enum BdrvTrackedRequestType {
  63    BDRV_TRACKED_READ,
  64    BDRV_TRACKED_WRITE,
  65    BDRV_TRACKED_DISCARD,
  66};
  67
  68typedef struct BdrvTrackedRequest {
  69    BlockDriverState *bs;
  70    int64_t offset;
  71    unsigned int bytes;
  72    enum BdrvTrackedRequestType type;
  73
  74    bool serialising;
  75    int64_t overlap_offset;
  76    unsigned int overlap_bytes;
  77
  78    QLIST_ENTRY(BdrvTrackedRequest) list;
  79    Coroutine *co; /* owner, used for deadlock detection */
  80    CoQueue wait_queue; /* coroutines blocked on this request */
  81
  82    struct BdrvTrackedRequest *waiting_for;
  83} BdrvTrackedRequest;
  84
  85struct BlockDriver {
  86    const char *format_name;
  87    int instance_size;
  88
  89    /* set to true if the BlockDriver is a block filter */
  90    bool is_filter;
  91    /* for snapshots block filter like Quorum can implement the
  92     * following recursive callback.
  93     * It's purpose is to recurse on the filter children while calling
  94     * bdrv_recurse_is_first_non_filter on them.
  95     * For a sample implementation look in the future Quorum block filter.
  96     */
  97    bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
  98                                             BlockDriverState *candidate);
  99
 100    int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
 101    int (*bdrv_probe_device)(const char *filename);
 102
 103    /* Any driver implementing this callback is expected to be able to handle
 104     * NULL file names in its .bdrv_open() implementation */
 105    void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
 106    /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
 107     * this field set to true, except ones that are defined only by their
 108     * child's bs.
 109     * An example of the last type will be the quorum block driver.
 110     */
 111    bool bdrv_needs_filename;
 112
 113    /* Set if a driver can support backing files */
 114    bool supports_backing;
 115
 116    /* For handling image reopen for split or non-split files */
 117    int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
 118                               BlockReopenQueue *queue, Error **errp);
 119    void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
 120    void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
 121    void (*bdrv_join_options)(QDict *options, QDict *old_options);
 122
 123    int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
 124                     Error **errp);
 125    int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
 126                          Error **errp);
 127    void (*bdrv_close)(BlockDriverState *bs);
 128    int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
 129    int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
 130    int (*bdrv_make_empty)(BlockDriverState *bs);
 131
 132    void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
 133
 134    /* aio */
 135    BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
 136        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 137        BlockCompletionFunc *cb, void *opaque);
 138    BlockAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
 139        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 140        BlockCompletionFunc *cb, void *opaque);
 141    BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
 142        BlockCompletionFunc *cb, void *opaque);
 143    BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
 144        int64_t offset, int count,
 145        BlockCompletionFunc *cb, void *opaque);
 146
 147    int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
 148        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 149    int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
 150        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
 151    int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
 152        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 153    int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs,
 154        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
 155    int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
 156        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
 157
 158    /*
 159     * Efficiently zero a region of the disk image.  Typically an image format
 160     * would use a compact metadata representation to implement this.  This
 161     * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
 162     * will be called instead.
 163     */
 164    int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
 165        int64_t offset, int count, BdrvRequestFlags flags);
 166    int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
 167        int64_t offset, int count);
 168    int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
 169        int64_t sector_num, int nb_sectors, int *pnum,
 170        BlockDriverState **file);
 171
 172    /*
 173     * Invalidate any cached meta-data.
 174     */
 175    void (*bdrv_invalidate_cache)(BlockDriverState *bs, Error **errp);
 176    int (*bdrv_inactivate)(BlockDriverState *bs);
 177
 178    /*
 179     * Flushes all data for all layers by calling bdrv_co_flush for underlying
 180     * layers, if needed. This function is needed for deterministic
 181     * synchronization of the flush finishing callback.
 182     */
 183    int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
 184
 185    /*
 186     * Flushes all data that was already written to the OS all the way down to
 187     * the disk (for example raw-posix calls fsync()).
 188     */
 189    int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
 190
 191    /*
 192     * Flushes all internal caches to the OS. The data may still sit in a
 193     * writeback cache of the host OS, but it will survive a crash of the qemu
 194     * process.
 195     */
 196    int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
 197
 198    const char *protocol_name;
 199    int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
 200
 201    int64_t (*bdrv_getlength)(BlockDriverState *bs);
 202    bool has_variable_length;
 203    int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
 204
 205    int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
 206        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);
 207
 208    int (*bdrv_snapshot_create)(BlockDriverState *bs,
 209                                QEMUSnapshotInfo *sn_info);
 210    int (*bdrv_snapshot_goto)(BlockDriverState *bs,
 211                              const char *snapshot_id);
 212    int (*bdrv_snapshot_delete)(BlockDriverState *bs,
 213                                const char *snapshot_id,
 214                                const char *name,
 215                                Error **errp);
 216    int (*bdrv_snapshot_list)(BlockDriverState *bs,
 217                              QEMUSnapshotInfo **psn_info);
 218    int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
 219                                  const char *snapshot_id,
 220                                  const char *name,
 221                                  Error **errp);
 222    int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
 223    ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
 224
 225    int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
 226                                          QEMUIOVector *qiov,
 227                                          int64_t pos);
 228    int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
 229                                          QEMUIOVector *qiov,
 230                                          int64_t pos);
 231
 232    int (*bdrv_change_backing_file)(BlockDriverState *bs,
 233        const char *backing_file, const char *backing_fmt);
 234
 235    /* removable device specific */
 236    bool (*bdrv_is_inserted)(BlockDriverState *bs);
 237    int (*bdrv_media_changed)(BlockDriverState *bs);
 238    void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
 239    void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
 240
 241    /* to control generic scsi devices */
 242    BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
 243        unsigned long int req, void *buf,
 244        BlockCompletionFunc *cb, void *opaque);
 245    int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
 246                                      unsigned long int req, void *buf);
 247
 248    /* List of options for creating images, terminated by name == NULL */
 249    QemuOptsList *create_opts;
 250
 251    /*
 252     * Returns 0 for completed check, -errno for internal errors.
 253     * The check results are stored in result.
 254     */
 255    int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
 256        BdrvCheckMode fix);
 257
 258    int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts,
 259                              BlockDriverAmendStatusCB *status_cb,
 260                              void *cb_opaque);
 261
 262    void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
 263
 264    /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
 265    int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
 266        const char *tag);
 267    int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
 268        const char *tag);
 269    int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
 270    bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
 271
 272    void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
 273
 274    /*
 275     * Returns 1 if newly created images are guaranteed to contain only
 276     * zeros, 0 otherwise.
 277     */
 278    int (*bdrv_has_zero_init)(BlockDriverState *bs);
 279
 280    /* Remove fd handlers, timers, and other event loop callbacks so the event
 281     * loop is no longer in use.  Called with no in-flight requests and in
 282     * depth-first traversal order with parents before child nodes.
 283     */
 284    void (*bdrv_detach_aio_context)(BlockDriverState *bs);
 285
 286    /* Add fd handlers, timers, and other event loop callbacks so I/O requests
 287     * can be processed again.  Called with no in-flight requests and in
 288     * depth-first traversal order with child nodes before parent nodes.
 289     */
 290    void (*bdrv_attach_aio_context)(BlockDriverState *bs,
 291                                    AioContext *new_context);
 292
 293    /* io queue for linux-aio */
 294    void (*bdrv_io_plug)(BlockDriverState *bs);
 295    void (*bdrv_io_unplug)(BlockDriverState *bs);
 296
 297    /**
 298     * Try to get @bs's logical and physical block size.
 299     * On success, store them in @bsz and return zero.
 300     * On failure, return negative errno.
 301     */
 302    int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
 303    /**
 304     * Try to get @bs's geometry (cyls, heads, sectors)
 305     * On success, store them in @geo and return 0.
 306     * On failure return -errno.
 307     * Only drivers that want to override guest geometry implement this
 308     * callback; see hd_geometry_guess().
 309     */
 310    int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
 311
 312    /**
 313     * Drain and stop any internal sources of requests in the driver, and
 314     * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
 315     */
 316    void (*bdrv_drain)(BlockDriverState *bs);
 317
 318    void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
 319                           Error **errp);
 320    void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
 321                           Error **errp);
 322
 323    QLIST_ENTRY(BlockDriver) list;
 324};
 325
 326typedef struct BlockLimits {
 327    /* Alignment requirement, in bytes, for offset/length of I/O
 328     * requests. Must be a power of 2 less than INT_MAX; defaults to
 329     * 1 for drivers with modern byte interfaces, and to 512
 330     * otherwise. */
 331    uint32_t request_alignment;
 332
 333    /* Maximum number of bytes that can be discarded at once (since it
 334     * is signed, it must be < 2G, if set). Must be multiple of
 335     * pdiscard_alignment, but need not be power of 2. May be 0 if no
 336     * inherent 32-bit limit */
 337    int32_t max_pdiscard;
 338
 339    /* Optimal alignment for discard requests in bytes. A power of 2
 340     * is best but not mandatory.  Must be a multiple of
 341     * bl.request_alignment, and must be less than max_pdiscard if
 342     * that is set. May be 0 if bl.request_alignment is good enough */
 343    uint32_t pdiscard_alignment;
 344
 345    /* Maximum number of bytes that can zeroized at once (since it is
 346     * signed, it must be < 2G, if set). Must be multiple of
 347     * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
 348    int32_t max_pwrite_zeroes;
 349
 350    /* Optimal alignment for write zeroes requests in bytes. A power
 351     * of 2 is best but not mandatory.  Must be a multiple of
 352     * bl.request_alignment, and must be less than max_pwrite_zeroes
 353     * if that is set. May be 0 if bl.request_alignment is good
 354     * enough */
 355    uint32_t pwrite_zeroes_alignment;
 356
 357    /* Optimal transfer length in bytes.  A power of 2 is best but not
 358     * mandatory.  Must be a multiple of bl.request_alignment, or 0 if
 359     * no preferred size */
 360    uint32_t opt_transfer;
 361
 362    /* Maximal transfer length in bytes.  Need not be power of 2, but
 363     * must be multiple of opt_transfer and bl.request_alignment, or 0
 364     * for no 32-bit limit.  For now, anything larger than INT_MAX is
 365     * clamped down. */
 366    uint32_t max_transfer;
 367
 368    /* memory alignment, in bytes so that no bounce buffer is needed */
 369    size_t min_mem_alignment;
 370
 371    /* memory alignment, in bytes, for bounce buffer */
 372    size_t opt_mem_alignment;
 373
 374    /* maximum number of iovec elements */
 375    int max_iov;
 376} BlockLimits;
 377
 378typedef struct BdrvOpBlocker BdrvOpBlocker;
 379
 380typedef struct BdrvAioNotifier {
 381    void (*attached_aio_context)(AioContext *new_context, void *opaque);
 382    void (*detach_aio_context)(void *opaque);
 383
 384    void *opaque;
 385    bool deleted;
 386
 387    QLIST_ENTRY(BdrvAioNotifier) list;
 388} BdrvAioNotifier;
 389
 390struct BdrvChildRole {
 391    void (*inherit_options)(int *child_flags, QDict *child_options,
 392                            int parent_flags, QDict *parent_options);
 393
 394    void (*change_media)(BdrvChild *child, bool load);
 395    void (*resize)(BdrvChild *child);
 396
 397    /* Returns a name that is supposedly more useful for human users than the
 398     * node name for identifying the node in question (in particular, a BB
 399     * name), or NULL if the parent can't provide a better name. */
 400    const char* (*get_name)(BdrvChild *child);
 401
 402    /*
 403     * If this pair of functions is implemented, the parent doesn't issue new
 404     * requests after returning from .drained_begin() until .drained_end() is
 405     * called.
 406     *
 407     * Note that this can be nested. If drained_begin() was called twice, new
 408     * I/O is allowed only after drained_end() was called twice, too.
 409     */
 410    void (*drained_begin)(BdrvChild *child);
 411    void (*drained_end)(BdrvChild *child);
 412};
 413
 414extern const BdrvChildRole child_file;
 415extern const BdrvChildRole child_format;
 416
 417struct BdrvChild {
 418    BlockDriverState *bs;
 419    char *name;
 420    const BdrvChildRole *role;
 421    void *opaque;
 422    QLIST_ENTRY(BdrvChild) next;
 423    QLIST_ENTRY(BdrvChild) next_parent;
 424};
 425
 426/*
 427 * Note: the function bdrv_append() copies and swaps contents of
 428 * BlockDriverStates, so if you add new fields to this struct, please
 429 * inspect bdrv_append() to determine if the new fields need to be
 430 * copied as well.
 431 */
 432struct BlockDriverState {
 433    int64_t total_sectors; /* if we are reading a disk image, give its
 434                              size in sectors */
 435    int open_flags; /* flags used to open the file, re-used for re-open */
 436    bool read_only; /* if true, the media is read only */
 437    bool encrypted; /* if true, the media is encrypted */
 438    bool valid_key; /* if true, a valid encryption key has been set */
 439    bool sg;        /* if true, the device is a /dev/sg* */
 440    bool probed;    /* if true, format was probed rather than specified */
 441
 442    int copy_on_read; /* if nonzero, copy read backing sectors into image.
 443                         note this is a reference count */
 444
 445    CoQueue flush_queue;            /* Serializing flush queue */
 446    bool active_flush_req;          /* Flush request in flight? */
 447    unsigned int write_gen;         /* Current data generation */
 448    unsigned int flushed_gen;       /* Flushed write generation */
 449
 450    BlockDriver *drv; /* NULL means no media */
 451    void *opaque;
 452
 453    AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
 454    /* long-running tasks intended to always use the same AioContext as this
 455     * BDS may register themselves in this list to be notified of changes
 456     * regarding this BDS's context */
 457    QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
 458    bool walking_aio_notifiers; /* to make removal during iteration safe */
 459
 460    char filename[PATH_MAX];
 461    char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
 462                                    this file image */
 463    char backing_format[16]; /* if non-zero and backing_file exists */
 464
 465    QDict *full_open_options;
 466    char exact_filename[PATH_MAX];
 467
 468    BdrvChild *backing;
 469    BdrvChild *file;
 470
 471    /* Callback before write request is processed */
 472    NotifierWithReturnList before_write_notifiers;
 473
 474    /* number of in-flight requests; overall and serialising */
 475    unsigned int in_flight;
 476    unsigned int serialising_in_flight;
 477
 478    bool wakeup;
 479
 480    /* Offset after the highest byte written to */
 481    uint64_t wr_highest_offset;
 482
 483    /* I/O Limits */
 484    BlockLimits bl;
 485
 486    /* Flags honored during pwrite (so far: BDRV_REQ_FUA) */
 487    unsigned int supported_write_flags;
 488    /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
 489     * BDRV_REQ_MAY_UNMAP) */
 490    unsigned int supported_zero_flags;
 491
 492    /* the following member gives a name to every node on the bs graph. */
 493    char node_name[32];
 494    /* element of the list of named nodes building the graph */
 495    QTAILQ_ENTRY(BlockDriverState) node_list;
 496    /* element of the list of all BlockDriverStates (all_bdrv_states) */
 497    QTAILQ_ENTRY(BlockDriverState) bs_list;
 498    /* element of the list of monitor-owned BDS */
 499    QTAILQ_ENTRY(BlockDriverState) monitor_list;
 500    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
 501    int refcnt;
 502
 503    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
 504
 505    /* operation blockers */
 506    QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
 507
 508    /* long-running background operation */
 509    BlockJob *job;
 510
 511    /* The node that this node inherited default options from (and a reopen on
 512     * which can affect this node by changing these defaults). This is always a
 513     * parent node of this node. */
 514    BlockDriverState *inherits_from;
 515    QLIST_HEAD(, BdrvChild) children;
 516    QLIST_HEAD(, BdrvChild) parents;
 517
 518    QDict *options;
 519    QDict *explicit_options;
 520    BlockdevDetectZeroesOptions detect_zeroes;
 521
 522    /* The error object in use for blocking operations on backing_hd */
 523    Error *backing_blocker;
 524
 525    /* threshold limit for writes, in bytes. "High water mark". */
 526    uint64_t write_threshold_offset;
 527    NotifierWithReturn write_threshold_notifier;
 528
 529    /* counters for nested bdrv_io_plug and bdrv_io_unplugged_begin */
 530    unsigned io_plugged;
 531    unsigned io_plug_disabled;
 532
 533    int quiesce_counter;
 534};
 535
 536struct BlockBackendRootState {
 537    int open_flags;
 538    bool read_only;
 539    BlockdevDetectZeroesOptions detect_zeroes;
 540};
 541
 542typedef enum BlockMirrorBackingMode {
 543    /* Reuse the existing backing chain from the source for the target.
 544     * - sync=full: Set backing BDS to NULL.
 545     * - sync=top:  Use source's backing BDS.
 546     * - sync=none: Use source as the backing BDS. */
 547    MIRROR_SOURCE_BACKING_CHAIN,
 548
 549    /* Open the target's backing chain completely anew */
 550    MIRROR_OPEN_BACKING_CHAIN,
 551
 552    /* Do not change the target's backing BDS after job completion */
 553    MIRROR_LEAVE_BACKING_CHAIN,
 554} BlockMirrorBackingMode;
 555
 556static inline BlockDriverState *backing_bs(BlockDriverState *bs)
 557{
 558    return bs->backing ? bs->backing->bs : NULL;
 559}
 560
 561
 562/* Essential block drivers which must always be statically linked into qemu, and
 563 * which therefore can be accessed without using bdrv_find_format() */
 564extern BlockDriver bdrv_file;
 565extern BlockDriver bdrv_raw;
 566extern BlockDriver bdrv_qcow2;
 567
 568int coroutine_fn bdrv_co_preadv(BdrvChild *child,
 569    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
 570    BdrvRequestFlags flags);
 571int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
 572    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
 573    BdrvRequestFlags flags);
 574
 575int get_tmp_filename(char *filename, int size);
 576BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
 577                            const char *filename);
 578
 579
 580/**
 581 * bdrv_add_before_write_notifier:
 582 *
 583 * Register a callback that is invoked before write requests are processed but
 584 * after any throttling or waiting for overlapping requests.
 585 */
 586void bdrv_add_before_write_notifier(BlockDriverState *bs,
 587                                    NotifierWithReturn *notifier);
 588
 589/**
 590 * bdrv_detach_aio_context:
 591 *
 592 * May be called from .bdrv_detach_aio_context() to detach children from the
 593 * current #AioContext.  This is only needed by block drivers that manage their
 594 * own children.  Both ->file and ->backing are automatically handled and
 595 * block drivers should not call this function on them explicitly.
 596 */
 597void bdrv_detach_aio_context(BlockDriverState *bs);
 598
 599/**
 600 * bdrv_attach_aio_context:
 601 *
 602 * May be called from .bdrv_attach_aio_context() to attach children to the new
 603 * #AioContext.  This is only needed by block drivers that manage their own
 604 * children.  Both ->file and ->backing are automatically handled and block
 605 * drivers should not call this function on them explicitly.
 606 */
 607void bdrv_attach_aio_context(BlockDriverState *bs,
 608                             AioContext *new_context);
 609
 610/**
 611 * bdrv_add_aio_context_notifier:
 612 *
 613 * If a long-running job intends to be always run in the same AioContext as a
 614 * certain BDS, it may use this function to be notified of changes regarding the
 615 * association of the BDS to an AioContext.
 616 *
 617 * attached_aio_context() is called after the target BDS has been attached to a
 618 * new AioContext; detach_aio_context() is called before the target BDS is being
 619 * detached from its old AioContext.
 620 */
 621void bdrv_add_aio_context_notifier(BlockDriverState *bs,
 622        void (*attached_aio_context)(AioContext *new_context, void *opaque),
 623        void (*detach_aio_context)(void *opaque), void *opaque);
 624
 625/**
 626 * bdrv_remove_aio_context_notifier:
 627 *
 628 * Unsubscribe of change notifications regarding the BDS's AioContext. The
 629 * parameters given here have to be the same as those given to
 630 * bdrv_add_aio_context_notifier().
 631 */
 632void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
 633                                      void (*aio_context_attached)(AioContext *,
 634                                                                   void *),
 635                                      void (*aio_context_detached)(void *),
 636                                      void *opaque);
 637
 638/**
 639 * bdrv_wakeup:
 640 * @bs: The BlockDriverState for which an I/O operation has been completed.
 641 *
 642 * Wake up the main thread if it is waiting on BDRV_POLL_WHILE.  During
 643 * synchronous I/O on a BlockDriverState that is attached to another
 644 * I/O thread, the main thread lets the I/O thread's event loop run,
 645 * waiting for the I/O operation to complete.  A bdrv_wakeup will wake
 646 * up the main thread if necessary.
 647 *
 648 * Manual calls to bdrv_wakeup are rarely necessary, because
 649 * bdrv_dec_in_flight already calls it.
 650 */
 651void bdrv_wakeup(BlockDriverState *bs);
 652
 653#ifdef _WIN32
 654int is_windows_drive(const char *filename);
 655#endif
 656
 657/**
 658 * stream_start:
 659 * @job_id: The id of the newly-created job, or %NULL to use the
 660 * device name of @bs.
 661 * @bs: Block device to operate on.
 662 * @base: Block device that will become the new base, or %NULL to
 663 * flatten the whole backing file chain onto @bs.
 664 * @backing_file_str: The file name that will be written to @bs as the
 665 * the new backing file if the job completes. Ignored if @base is %NULL.
 666 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 667 * @on_error: The action to take upon error.
 668 * @errp: Error object.
 669 *
 670 * Start a streaming operation on @bs.  Clusters that are unallocated
 671 * in @bs, but allocated in any image between @base and @bs (both
 672 * exclusive) will be written to @bs.  At the end of a successful
 673 * streaming job, the backing file of @bs will be changed to
 674 * @backing_file_str in the written image and to @base in the live
 675 * BlockDriverState.
 676 */
 677void stream_start(const char *job_id, BlockDriverState *bs,
 678                  BlockDriverState *base, const char *backing_file_str,
 679                  int64_t speed, BlockdevOnError on_error, Error **errp);
 680
 681/**
 682 * commit_start:
 683 * @job_id: The id of the newly-created job, or %NULL to use the
 684 * device name of @bs.
 685 * @bs: Active block device.
 686 * @top: Top block device to be committed.
 687 * @base: Block device that will be written into, and become the new top.
 688 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 689 * @on_error: The action to take upon error.
 690 * @backing_file_str: String to use as the backing file in @top's overlay
 691 * @errp: Error object.
 692 *
 693 */
 694void commit_start(const char *job_id, BlockDriverState *bs,
 695                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
 696                  BlockdevOnError on_error, const char *backing_file_str,
 697                  Error **errp);
 698/**
 699 * commit_active_start:
 700 * @job_id: The id of the newly-created job, or %NULL to use the
 701 * device name of @bs.
 702 * @bs: Active block device to be committed.
 703 * @base: Block device that will be written into, and become the new top.
 704 * @creation_flags: Flags that control the behavior of the Job lifetime.
 705 *                  See @BlockJobCreateFlags
 706 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 707 * @on_error: The action to take upon error.
 708 * @cb: Completion function for the job.
 709 * @opaque: Opaque pointer value passed to @cb.
 710 * @errp: Error object.
 711 * @auto_complete: Auto complete the job.
 712 *
 713 */
 714void commit_active_start(const char *job_id, BlockDriverState *bs,
 715                         BlockDriverState *base, int creation_flags,
 716                         int64_t speed, BlockdevOnError on_error,
 717                         BlockCompletionFunc *cb,
 718                         void *opaque, Error **errp, bool auto_complete);
 719/*
 720 * mirror_start:
 721 * @job_id: The id of the newly-created job, or %NULL to use the
 722 * device name of @bs.
 723 * @bs: Block device to operate on.
 724 * @target: Block device to write to.
 725 * @replaces: Block graph node name to replace once the mirror is done. Can
 726 *            only be used when full mirroring is selected.
 727 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 728 * @granularity: The chosen granularity for the dirty bitmap.
 729 * @buf_size: The amount of data that can be in flight at one time.
 730 * @mode: Whether to collapse all images in the chain to the target.
 731 * @backing_mode: How to establish the target's backing chain after completion.
 732 * @on_source_error: The action to take upon error reading from the source.
 733 * @on_target_error: The action to take upon error writing to the target.
 734 * @unmap: Whether to unmap target where source sectors only contain zeroes.
 735 * @errp: Error object.
 736 *
 737 * Start a mirroring operation on @bs.  Clusters that are allocated
 738 * in @bs will be written to @target until the job is cancelled or
 739 * manually completed.  At the end of a successful mirroring job,
 740 * @bs will be switched to read from @target.
 741 */
 742void mirror_start(const char *job_id, BlockDriverState *bs,
 743                  BlockDriverState *target, const char *replaces,
 744                  int64_t speed, uint32_t granularity, int64_t buf_size,
 745                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
 746                  BlockdevOnError on_source_error,
 747                  BlockdevOnError on_target_error,
 748                  bool unmap, Error **errp);
 749
 750/*
 751 * backup_job_create:
 752 * @job_id: The id of the newly-created job, or %NULL to use the
 753 * device name of @bs.
 754 * @bs: Block device to operate on.
 755 * @target: Block device to write to.
 756 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 757 * @sync_mode: What parts of the disk image should be copied to the destination.
 758 * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL.
 759 * @on_source_error: The action to take upon error reading from the source.
 760 * @on_target_error: The action to take upon error writing to the target.
 761 * @creation_flags: Flags that control the behavior of the Job lifetime.
 762 *                  See @BlockJobCreateFlags
 763 * @cb: Completion function for the job.
 764 * @opaque: Opaque pointer value passed to @cb.
 765 * @txn: Transaction that this job is part of (may be NULL).
 766 *
 767 * Create a backup operation on @bs.  Clusters in @bs are written to @target
 768 * until the job is cancelled or manually completed.
 769 */
 770BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
 771                            BlockDriverState *target, int64_t speed,
 772                            MirrorSyncMode sync_mode,
 773                            BdrvDirtyBitmap *sync_bitmap,
 774                            bool compress,
 775                            BlockdevOnError on_source_error,
 776                            BlockdevOnError on_target_error,
 777                            int creation_flags,
 778                            BlockCompletionFunc *cb, void *opaque,
 779                            BlockJobTxn *txn, Error **errp);
 780
 781void hmp_drive_add_node(Monitor *mon, const char *optstr);
 782
 783BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
 784                                  const char *child_name,
 785                                  const BdrvChildRole *child_role,
 786                                  void *opaque);
 787void bdrv_root_unref_child(BdrvChild *child);
 788
 789const char *bdrv_get_parent_name(const BlockDriverState *bs);
 790void blk_dev_change_media_cb(BlockBackend *blk, bool load);
 791bool blk_dev_has_removable_media(BlockBackend *blk);
 792bool blk_dev_has_tray(BlockBackend *blk);
 793void blk_dev_eject_request(BlockBackend *blk, bool force);
 794bool blk_dev_is_tray_open(BlockBackend *blk);
 795bool blk_dev_is_medium_locked(BlockBackend *blk);
 796
 797void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int64_t nr_sect);
 798bool bdrv_requests_pending(BlockDriverState *bs);
 799
 800void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
 801void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
 802
 803void bdrv_inc_in_flight(BlockDriverState *bs);
 804void bdrv_dec_in_flight(BlockDriverState *bs);
 805
 806void blockdev_close_all_bdrv_states(void);
 807
 808#endif /* BLOCK_INT_H */
 809