qemu/include/block/block_int.h
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator block driver
   3 *
   4 * Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24#ifndef BLOCK_INT_H
  25#define BLOCK_INT_H
  26
  27#include "block/accounting.h"
  28#include "block/block.h"
  29#include "block/aio-wait.h"
  30#include "qemu/queue.h"
  31#include "qemu/coroutine.h"
  32#include "qemu/stats64.h"
  33#include "qemu/timer.h"
  34#include "qemu/hbitmap.h"
  35#include "block/snapshot.h"
  36#include "qemu/main-loop.h"
  37#include "qemu/throttle.h"
  38
  39#define BLOCK_FLAG_LAZY_REFCOUNTS   8
  40
  41#define BLOCK_OPT_SIZE              "size"
  42#define BLOCK_OPT_ENCRYPT           "encryption"
  43#define BLOCK_OPT_ENCRYPT_FORMAT    "encrypt.format"
  44#define BLOCK_OPT_COMPAT6           "compat6"
  45#define BLOCK_OPT_HWVERSION         "hwversion"
  46#define BLOCK_OPT_BACKING_FILE      "backing_file"
  47#define BLOCK_OPT_BACKING_FMT       "backing_fmt"
  48#define BLOCK_OPT_CLUSTER_SIZE      "cluster_size"
  49#define BLOCK_OPT_TABLE_SIZE        "table_size"
  50#define BLOCK_OPT_PREALLOC          "preallocation"
  51#define BLOCK_OPT_SUBFMT            "subformat"
  52#define BLOCK_OPT_COMPAT_LEVEL      "compat"
  53#define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
  54#define BLOCK_OPT_ADAPTER_TYPE      "adapter_type"
  55#define BLOCK_OPT_REDUNDANCY        "redundancy"
  56#define BLOCK_OPT_NOCOW             "nocow"
  57#define BLOCK_OPT_OBJECT_SIZE       "object_size"
  58#define BLOCK_OPT_REFCOUNT_BITS     "refcount_bits"
  59
  60#define BLOCK_PROBE_BUF_SIZE        512
  61
  62enum BdrvTrackedRequestType {
  63    BDRV_TRACKED_READ,
  64    BDRV_TRACKED_WRITE,
  65    BDRV_TRACKED_DISCARD,
  66    BDRV_TRACKED_TRUNCATE,
  67};
  68
  69typedef struct BdrvTrackedRequest {
  70    BlockDriverState *bs;
  71    int64_t offset;
  72    uint64_t bytes;
  73    enum BdrvTrackedRequestType type;
  74
  75    bool serialising;
  76    int64_t overlap_offset;
  77    uint64_t overlap_bytes;
  78
  79    QLIST_ENTRY(BdrvTrackedRequest) list;
  80    Coroutine *co; /* owner, used for deadlock detection */
  81    CoQueue wait_queue; /* coroutines blocked on this request */
  82
  83    struct BdrvTrackedRequest *waiting_for;
  84} BdrvTrackedRequest;
  85
  86struct BlockDriver {
  87    const char *format_name;
  88    int instance_size;
  89
  90    /* set to true if the BlockDriver is a block filter. Block filters pass
  91     * certain callbacks that refer to data (see block.c) to their bs->file if
  92     * the driver doesn't implement them. Drivers that do not wish to forward
  93     * must implement them and return -ENOTSUP.
  94     */
  95    bool is_filter;
  96    /* for snapshots block filter like Quorum can implement the
  97     * following recursive callback.
  98     * It's purpose is to recurse on the filter children while calling
  99     * bdrv_recurse_is_first_non_filter on them.
 100     * For a sample implementation look in the future Quorum block filter.
 101     */
 102    bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
 103                                             BlockDriverState *candidate);
 104
 105    int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
 106    int (*bdrv_probe_device)(const char *filename);
 107
 108    /* Any driver implementing this callback is expected to be able to handle
 109     * NULL file names in its .bdrv_open() implementation */
 110    void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
 111    /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
 112     * this field set to true, except ones that are defined only by their
 113     * child's bs.
 114     * An example of the last type will be the quorum block driver.
 115     */
 116    bool bdrv_needs_filename;
 117
 118    /* Set if a driver can support backing files */
 119    bool supports_backing;
 120
 121    /* For handling image reopen for split or non-split files */
 122    int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
 123                               BlockReopenQueue *queue, Error **errp);
 124    void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
 125    void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
 126    void (*bdrv_join_options)(QDict *options, QDict *old_options);
 127
 128    int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
 129                     Error **errp);
 130
 131    /* Protocol drivers should implement this instead of bdrv_open */
 132    int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
 133                          Error **errp);
 134    void (*bdrv_close)(BlockDriverState *bs);
 135    int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
 136                                       Error **errp);
 137    int coroutine_fn (*bdrv_co_create_opts)(const char *filename,
 138                                            QemuOpts *opts,
 139                                            Error **errp);
 140    int (*bdrv_make_empty)(BlockDriverState *bs);
 141
 142    void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
 143
 144    /* aio */
 145    BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
 146        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
 147        BlockCompletionFunc *cb, void *opaque);
 148    BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
 149        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
 150        BlockCompletionFunc *cb, void *opaque);
 151    BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
 152        BlockCompletionFunc *cb, void *opaque);
 153    BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
 154        int64_t offset, int bytes,
 155        BlockCompletionFunc *cb, void *opaque);
 156
 157    int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
 158        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 159
 160    /**
 161     * @offset: position in bytes to read at
 162     * @bytes: number of bytes to read
 163     * @qiov: the buffers to fill with read data
 164     * @flags: currently unused, always 0
 165     *
 166     * @offset and @bytes will be a multiple of 'request_alignment',
 167     * but the length of individual @qiov elements does not have to
 168     * be a multiple.
 169     *
 170     * @bytes will always equal the total size of @qiov, and will be
 171     * no larger than 'max_transfer'.
 172     *
 173     * The buffer in @qiov may point directly to guest memory.
 174     */
 175    int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
 176        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
 177    int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
 178        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
 179    /**
 180     * @offset: position in bytes to write at
 181     * @bytes: number of bytes to write
 182     * @qiov: the buffers containing data to write
 183     * @flags: zero or more bits allowed by 'supported_write_flags'
 184     *
 185     * @offset and @bytes will be a multiple of 'request_alignment',
 186     * but the length of individual @qiov elements does not have to
 187     * be a multiple.
 188     *
 189     * @bytes will always equal the total size of @qiov, and will be
 190     * no larger than 'max_transfer'.
 191     *
 192     * The buffer in @qiov may point directly to guest memory.
 193     */
 194    int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
 195        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
 196
 197    /*
 198     * Efficiently zero a region of the disk image.  Typically an image format
 199     * would use a compact metadata representation to implement this.  This
 200     * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
 201     * will be called instead.
 202     */
 203    int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
 204        int64_t offset, int bytes, BdrvRequestFlags flags);
 205    int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
 206        int64_t offset, int bytes);
 207
 208    /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
 209     * and invoke bdrv_co_copy_range_from(child, ...), or invoke
 210     * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
 211     *
 212     * See the comment of bdrv_co_copy_range for the parameter and return value
 213     * semantics.
 214     */
 215    int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
 216                                                BdrvChild *src,
 217                                                uint64_t offset,
 218                                                BdrvChild *dst,
 219                                                uint64_t dst_offset,
 220                                                uint64_t bytes,
 221                                                BdrvRequestFlags read_flags,
 222                                                BdrvRequestFlags write_flags);
 223
 224    /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
 225     * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
 226     * operation if @bs is the leaf and @src has the same BlockDriver.  Return
 227     * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
 228     *
 229     * See the comment of bdrv_co_copy_range for the parameter and return value
 230     * semantics.
 231     */
 232    int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
 233                                              BdrvChild *src,
 234                                              uint64_t src_offset,
 235                                              BdrvChild *dst,
 236                                              uint64_t dst_offset,
 237                                              uint64_t bytes,
 238                                              BdrvRequestFlags read_flags,
 239                                              BdrvRequestFlags write_flags);
 240
 241    /*
 242     * Building block for bdrv_block_status[_above] and
 243     * bdrv_is_allocated[_above].  The driver should answer only
 244     * according to the current layer, and should only need to set
 245     * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
 246     * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
 247     * layer, the result should be 0 (and not BDRV_BLOCK_ZERO).  See
 248     * block.h for the overall meaning of the bits.  As a hint, the
 249     * flag want_zero is true if the caller cares more about precise
 250     * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
 251     * overall allocation (favor larger *pnum, perhaps by reporting
 252     * _DATA instead of _ZERO).  The block layer guarantees input
 253     * clamped to bdrv_getlength() and aligned to request_alignment,
 254     * as well as non-NULL pnum, map, and file; in turn, the driver
 255     * must return an error or set pnum to an aligned non-zero value.
 256     */
 257    int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
 258        bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
 259        int64_t *map, BlockDriverState **file);
 260
 261    /*
 262     * Invalidate any cached meta-data.
 263     */
 264    void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
 265                                                  Error **errp);
 266    int (*bdrv_inactivate)(BlockDriverState *bs);
 267
 268    /*
 269     * Flushes all data for all layers by calling bdrv_co_flush for underlying
 270     * layers, if needed. This function is needed for deterministic
 271     * synchronization of the flush finishing callback.
 272     */
 273    int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
 274
 275    /*
 276     * Flushes all data that was already written to the OS all the way down to
 277     * the disk (for example file-posix.c calls fsync()).
 278     */
 279    int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
 280
 281    /*
 282     * Flushes all internal caches to the OS. The data may still sit in a
 283     * writeback cache of the host OS, but it will survive a crash of the qemu
 284     * process.
 285     */
 286    int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
 287
 288    /*
 289     * Drivers setting this field must be able to work with just a plain
 290     * filename with '<protocol_name>:' as a prefix, and no other options.
 291     * Options may be extracted from the filename by implementing
 292     * bdrv_parse_filename.
 293     */
 294    const char *protocol_name;
 295    int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
 296                                         PreallocMode prealloc, Error **errp);
 297
 298    int64_t (*bdrv_getlength)(BlockDriverState *bs);
 299    bool has_variable_length;
 300    int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
 301    BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
 302                                      Error **errp);
 303
 304    int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
 305        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);
 306
 307    int (*bdrv_snapshot_create)(BlockDriverState *bs,
 308                                QEMUSnapshotInfo *sn_info);
 309    int (*bdrv_snapshot_goto)(BlockDriverState *bs,
 310                              const char *snapshot_id);
 311    int (*bdrv_snapshot_delete)(BlockDriverState *bs,
 312                                const char *snapshot_id,
 313                                const char *name,
 314                                Error **errp);
 315    int (*bdrv_snapshot_list)(BlockDriverState *bs,
 316                              QEMUSnapshotInfo **psn_info);
 317    int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
 318                                  const char *snapshot_id,
 319                                  const char *name,
 320                                  Error **errp);
 321    int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
 322    ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
 323
 324    int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
 325                                          QEMUIOVector *qiov,
 326                                          int64_t pos);
 327    int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
 328                                          QEMUIOVector *qiov,
 329                                          int64_t pos);
 330
 331    int (*bdrv_change_backing_file)(BlockDriverState *bs,
 332        const char *backing_file, const char *backing_fmt);
 333
 334    /* removable device specific */
 335    bool (*bdrv_is_inserted)(BlockDriverState *bs);
 336    void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
 337    void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
 338
 339    /* to control generic scsi devices */
 340    BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
 341        unsigned long int req, void *buf,
 342        BlockCompletionFunc *cb, void *opaque);
 343    int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
 344                                      unsigned long int req, void *buf);
 345
 346    /* List of options for creating images, terminated by name == NULL */
 347    QemuOptsList *create_opts;
 348
 349    /*
 350     * Returns 0 for completed check, -errno for internal errors.
 351     * The check results are stored in result.
 352     */
 353    int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
 354                                      BdrvCheckResult *result,
 355                                      BdrvCheckMode fix);
 356
 357    int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts,
 358                              BlockDriverAmendStatusCB *status_cb,
 359                              void *cb_opaque,
 360                              Error **errp);
 361
 362    void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
 363
 364    /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
 365    int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
 366        const char *tag);
 367    int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
 368        const char *tag);
 369    int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
 370    bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
 371
 372    void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
 373
 374    /*
 375     * Returns 1 if newly created images are guaranteed to contain only
 376     * zeros, 0 otherwise.
 377     */
 378    int (*bdrv_has_zero_init)(BlockDriverState *bs);
 379
 380    /* Remove fd handlers, timers, and other event loop callbacks so the event
 381     * loop is no longer in use.  Called with no in-flight requests and in
 382     * depth-first traversal order with parents before child nodes.
 383     */
 384    void (*bdrv_detach_aio_context)(BlockDriverState *bs);
 385
 386    /* Add fd handlers, timers, and other event loop callbacks so I/O requests
 387     * can be processed again.  Called with no in-flight requests and in
 388     * depth-first traversal order with child nodes before parent nodes.
 389     */
 390    void (*bdrv_attach_aio_context)(BlockDriverState *bs,
 391                                    AioContext *new_context);
 392
 393    /* io queue for linux-aio */
 394    void (*bdrv_io_plug)(BlockDriverState *bs);
 395    void (*bdrv_io_unplug)(BlockDriverState *bs);
 396
 397    /**
 398     * Try to get @bs's logical and physical block size.
 399     * On success, store them in @bsz and return zero.
 400     * On failure, return negative errno.
 401     */
 402    int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
 403    /**
 404     * Try to get @bs's geometry (cyls, heads, sectors)
 405     * On success, store them in @geo and return 0.
 406     * On failure return -errno.
 407     * Only drivers that want to override guest geometry implement this
 408     * callback; see hd_geometry_guess().
 409     */
 410    int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
 411
 412    /**
 413     * bdrv_co_drain_begin is called if implemented in the beginning of a
 414     * drain operation to drain and stop any internal sources of requests in
 415     * the driver.
 416     * bdrv_co_drain_end is called if implemented at the end of the drain.
 417     *
 418     * They should be used by the driver to e.g. manage scheduled I/O
 419     * requests, or toggle an internal state. After the end of the drain new
 420     * requests will continue normally.
 421     */
 422    void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
 423    void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
 424
 425    void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
 426                           Error **errp);
 427    void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
 428                           Error **errp);
 429
 430    /**
 431     * Informs the block driver that a permission change is intended. The
 432     * driver checks whether the change is permissible and may take other
 433     * preparations for the change (e.g. get file system locks). This operation
 434     * is always followed either by a call to either .bdrv_set_perm or
 435     * .bdrv_abort_perm_update.
 436     *
 437     * Checks whether the requested set of cumulative permissions in @perm
 438     * can be granted for accessing @bs and whether no other users are using
 439     * permissions other than those given in @shared (both arguments take
 440     * BLK_PERM_* bitmasks).
 441     *
 442     * If both conditions are met, 0 is returned. Otherwise, -errno is returned
 443     * and errp is set to an error describing the conflict.
 444     */
 445    int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
 446                           uint64_t shared, Error **errp);
 447
 448    /**
 449     * Called to inform the driver that the set of cumulative set of used
 450     * permissions for @bs has changed to @perm, and the set of sharable
 451     * permission to @shared. The driver can use this to propagate changes to
 452     * its children (i.e. request permissions only if a parent actually needs
 453     * them).
 454     *
 455     * This function is only invoked after bdrv_check_perm(), so block drivers
 456     * may rely on preparations made in their .bdrv_check_perm implementation.
 457     */
 458    void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
 459
 460    /*
 461     * Called to inform the driver that after a previous bdrv_check_perm()
 462     * call, the permission update is not performed and any preparations made
 463     * for it (e.g. taken file locks) need to be undone.
 464     *
 465     * This function can be called even for nodes that never saw a
 466     * bdrv_check_perm() call. It is a no-op then.
 467     */
 468    void (*bdrv_abort_perm_update)(BlockDriverState *bs);
 469
 470    /**
 471     * Returns in @nperm and @nshared the permissions that the driver for @bs
 472     * needs on its child @c, based on the cumulative permissions requested by
 473     * the parents in @parent_perm and @parent_shared.
 474     *
 475     * If @c is NULL, return the permissions for attaching a new child for the
 476     * given @role.
 477     *
 478     * If @reopen_queue is non-NULL, don't return the currently needed
 479     * permissions, but those that will be needed after applying the
 480     * @reopen_queue.
 481     */
 482     void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
 483                             const BdrvChildRole *role,
 484                             BlockReopenQueue *reopen_queue,
 485                             uint64_t parent_perm, uint64_t parent_shared,
 486                             uint64_t *nperm, uint64_t *nshared);
 487
 488    /**
 489     * Bitmaps should be marked as 'IN_USE' in the image on reopening image
 490     * as rw. This handler should realize it. It also should unset readonly
 491     * field of BlockDirtyBitmap's in case of success.
 492     */
 493    int (*bdrv_reopen_bitmaps_rw)(BlockDriverState *bs, Error **errp);
 494    bool (*bdrv_can_store_new_dirty_bitmap)(BlockDriverState *bs,
 495                                            const char *name,
 496                                            uint32_t granularity,
 497                                            Error **errp);
 498    void (*bdrv_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
 499                                                const char *name,
 500                                                Error **errp);
 501
 502    /**
 503     * Register/unregister a buffer for I/O. For example, when the driver is
 504     * interested to know the memory areas that will later be used in iovs, so
 505     * that it can do IOMMU mapping with VFIO etc., in order to get better
 506     * performance. In the case of VFIO drivers, this callback is used to do
 507     * DMA mapping for hot buffers.
 508     */
 509    void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
 510    void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
 511    QLIST_ENTRY(BlockDriver) list;
 512};
 513
 514typedef struct BlockLimits {
 515    /* Alignment requirement, in bytes, for offset/length of I/O
 516     * requests. Must be a power of 2 less than INT_MAX; defaults to
 517     * 1 for drivers with modern byte interfaces, and to 512
 518     * otherwise. */
 519    uint32_t request_alignment;
 520
 521    /* Maximum number of bytes that can be discarded at once (since it
 522     * is signed, it must be < 2G, if set). Must be multiple of
 523     * pdiscard_alignment, but need not be power of 2. May be 0 if no
 524     * inherent 32-bit limit */
 525    int32_t max_pdiscard;
 526
 527    /* Optimal alignment for discard requests in bytes. A power of 2
 528     * is best but not mandatory.  Must be a multiple of
 529     * bl.request_alignment, and must be less than max_pdiscard if
 530     * that is set. May be 0 if bl.request_alignment is good enough */
 531    uint32_t pdiscard_alignment;
 532
 533    /* Maximum number of bytes that can zeroized at once (since it is
 534     * signed, it must be < 2G, if set). Must be multiple of
 535     * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
 536    int32_t max_pwrite_zeroes;
 537
 538    /* Optimal alignment for write zeroes requests in bytes. A power
 539     * of 2 is best but not mandatory.  Must be a multiple of
 540     * bl.request_alignment, and must be less than max_pwrite_zeroes
 541     * if that is set. May be 0 if bl.request_alignment is good
 542     * enough */
 543    uint32_t pwrite_zeroes_alignment;
 544
 545    /* Optimal transfer length in bytes.  A power of 2 is best but not
 546     * mandatory.  Must be a multiple of bl.request_alignment, or 0 if
 547     * no preferred size */
 548    uint32_t opt_transfer;
 549
 550    /* Maximal transfer length in bytes.  Need not be power of 2, but
 551     * must be multiple of opt_transfer and bl.request_alignment, or 0
 552     * for no 32-bit limit.  For now, anything larger than INT_MAX is
 553     * clamped down. */
 554    uint32_t max_transfer;
 555
 556    /* memory alignment, in bytes so that no bounce buffer is needed */
 557    size_t min_mem_alignment;
 558
 559    /* memory alignment, in bytes, for bounce buffer */
 560    size_t opt_mem_alignment;
 561
 562    /* maximum number of iovec elements */
 563    int max_iov;
 564} BlockLimits;
 565
 566typedef struct BdrvOpBlocker BdrvOpBlocker;
 567
 568typedef struct BdrvAioNotifier {
 569    void (*attached_aio_context)(AioContext *new_context, void *opaque);
 570    void (*detach_aio_context)(void *opaque);
 571
 572    void *opaque;
 573    bool deleted;
 574
 575    QLIST_ENTRY(BdrvAioNotifier) list;
 576} BdrvAioNotifier;
 577
 578struct BdrvChildRole {
 579    /* If true, bdrv_replace_node() doesn't change the node this BdrvChild
 580     * points to. */
 581    bool stay_at_node;
 582
 583    /* If true, the parent is a BlockDriverState and bdrv_next_all_states()
 584     * will return it. This information is used for drain_all, where every node
 585     * will be drained separately, so the drain only needs to be propagated to
 586     * non-BDS parents. */
 587    bool parent_is_bds;
 588
 589    void (*inherit_options)(int *child_flags, QDict *child_options,
 590                            int parent_flags, QDict *parent_options);
 591
 592    void (*change_media)(BdrvChild *child, bool load);
 593    void (*resize)(BdrvChild *child);
 594
 595    /* Returns a name that is supposedly more useful for human users than the
 596     * node name for identifying the node in question (in particular, a BB
 597     * name), or NULL if the parent can't provide a better name. */
 598    const char *(*get_name)(BdrvChild *child);
 599
 600    /* Returns a malloced string that describes the parent of the child for a
 601     * human reader. This could be a node-name, BlockBackend name, qdev ID or
 602     * QOM path of the device owning the BlockBackend, job type and ID etc. The
 603     * caller is responsible for freeing the memory. */
 604    char *(*get_parent_desc)(BdrvChild *child);
 605
 606    /*
 607     * If this pair of functions is implemented, the parent doesn't issue new
 608     * requests after returning from .drained_begin() until .drained_end() is
 609     * called.
 610     *
 611     * These functions must not change the graph (and therefore also must not
 612     * call aio_poll(), which could change the graph indirectly).
 613     *
 614     * Note that this can be nested. If drained_begin() was called twice, new
 615     * I/O is allowed only after drained_end() was called twice, too.
 616     */
 617    void (*drained_begin)(BdrvChild *child);
 618    void (*drained_end)(BdrvChild *child);
 619
 620    /*
 621     * Returns whether the parent has pending requests for the child. This
 622     * callback is polled after .drained_begin() has been called until all
 623     * activity on the child has stopped.
 624     */
 625    bool (*drained_poll)(BdrvChild *child);
 626
 627    /* Notifies the parent that the child has been activated/inactivated (e.g.
 628     * when migration is completing) and it can start/stop requesting
 629     * permissions and doing I/O on it. */
 630    void (*activate)(BdrvChild *child, Error **errp);
 631    int (*inactivate)(BdrvChild *child);
 632
 633    void (*attach)(BdrvChild *child);
 634    void (*detach)(BdrvChild *child);
 635
 636    /* Notifies the parent that the filename of its child has changed (e.g.
 637     * because the direct child was removed from the backing chain), so that it
 638     * can update its reference. */
 639    int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
 640                           const char *filename, Error **errp);
 641};
 642
 643extern const BdrvChildRole child_file;
 644extern const BdrvChildRole child_format;
 645extern const BdrvChildRole child_backing;
 646
 647struct BdrvChild {
 648    BlockDriverState *bs;
 649    char *name;
 650    const BdrvChildRole *role;
 651    void *opaque;
 652
 653    /**
 654     * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
 655     */
 656    uint64_t perm;
 657
 658    /**
 659     * Permissions that can still be granted to other users of @bs while this
 660     * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
 661     */
 662    uint64_t shared_perm;
 663
 664    QLIST_ENTRY(BdrvChild) next;
 665    QLIST_ENTRY(BdrvChild) next_parent;
 666};
 667
 668/*
 669 * Note: the function bdrv_append() copies and swaps contents of
 670 * BlockDriverStates, so if you add new fields to this struct, please
 671 * inspect bdrv_append() to determine if the new fields need to be
 672 * copied as well.
 673 */
 674struct BlockDriverState {
 675    /* Protected by big QEMU lock or read-only after opening.  No special
 676     * locking needed during I/O...
 677     */
 678    int open_flags; /* flags used to open the file, re-used for re-open */
 679    bool read_only; /* if true, the media is read only */
 680    bool encrypted; /* if true, the media is encrypted */
 681    bool sg;        /* if true, the device is a /dev/sg* */
 682    bool probed;    /* if true, format was probed rather than specified */
 683    bool force_share; /* if true, always allow all shared permissions */
 684    bool implicit;  /* if true, this filter node was automatically inserted */
 685
 686    BlockDriver *drv; /* NULL means no media */
 687    void *opaque;
 688
 689    AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
 690    /* long-running tasks intended to always use the same AioContext as this
 691     * BDS may register themselves in this list to be notified of changes
 692     * regarding this BDS's context */
 693    QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
 694    bool walking_aio_notifiers; /* to make removal during iteration safe */
 695
 696    char filename[PATH_MAX];
 697    char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
 698                                    this file image */
 699    char backing_format[16]; /* if non-zero and backing_file exists */
 700
 701    QDict *full_open_options;
 702    char exact_filename[PATH_MAX];
 703
 704    BdrvChild *backing;
 705    BdrvChild *file;
 706
 707    /* I/O Limits */
 708    BlockLimits bl;
 709
 710    /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
 711     * BDRV_REQ_WRITE_UNCHANGED).
 712     * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
 713     * writes will be issued as normal writes without the flag set.
 714     * This is important to note for drivers that do not explicitly
 715     * request a WRITE permission for their children and instead take
 716     * the same permissions as their parent did (this is commonly what
 717     * block filters do).  Such drivers have to be aware that the
 718     * parent may have taken a WRITE_UNCHANGED permission only and is
 719     * issuing such requests.  Drivers either must make sure that
 720     * these requests do not result in plain WRITE accesses (usually
 721     * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
 722     * every incoming write request as-is, including potentially that
 723     * flag), or they have to explicitly take the WRITE permission for
 724     * their children. */
 725    unsigned int supported_write_flags;
 726    /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
 727     * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
 728    unsigned int supported_zero_flags;
 729
 730    /* the following member gives a name to every node on the bs graph. */
 731    char node_name[32];
 732    /* element of the list of named nodes building the graph */
 733    QTAILQ_ENTRY(BlockDriverState) node_list;
 734    /* element of the list of all BlockDriverStates (all_bdrv_states) */
 735    QTAILQ_ENTRY(BlockDriverState) bs_list;
 736    /* element of the list of monitor-owned BDS */
 737    QTAILQ_ENTRY(BlockDriverState) monitor_list;
 738    int refcnt;
 739
 740    /* operation blockers */
 741    QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
 742
 743    /* long-running background operation */
 744    BlockJob *job;
 745
 746    /* The node that this node inherited default options from (and a reopen on
 747     * which can affect this node by changing these defaults). This is always a
 748     * parent node of this node. */
 749    BlockDriverState *inherits_from;
 750    QLIST_HEAD(, BdrvChild) children;
 751    QLIST_HEAD(, BdrvChild) parents;
 752
 753    QDict *options;
 754    QDict *explicit_options;
 755    BlockdevDetectZeroesOptions detect_zeroes;
 756
 757    /* The error object in use for blocking operations on backing_hd */
 758    Error *backing_blocker;
 759
 760    /* Protected by AioContext lock */
 761
 762    /* If we are reading a disk image, give its size in sectors.
 763     * Generally read-only; it is written to by load_snapshot and
 764     * save_snaphost, but the block layer is quiescent during those.
 765     */
 766    int64_t total_sectors;
 767
 768    /* Callback before write request is processed */
 769    NotifierWithReturnList before_write_notifiers;
 770
 771    /* threshold limit for writes, in bytes. "High water mark". */
 772    uint64_t write_threshold_offset;
 773    NotifierWithReturn write_threshold_notifier;
 774
 775    /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
 776     * Reading from the list can be done with either the BQL or the
 777     * dirty_bitmap_mutex.  Modifying a bitmap only requires
 778     * dirty_bitmap_mutex.  */
 779    QemuMutex dirty_bitmap_mutex;
 780    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
 781
 782    /* Offset after the highest byte written to */
 783    Stat64 wr_highest_offset;
 784
 785    /* If true, copy read backing sectors into image.  Can be >1 if more
 786     * than one client has requested copy-on-read.  Accessed with atomic
 787     * ops.
 788     */
 789    int copy_on_read;
 790
 791    /* number of in-flight requests; overall and serialising.
 792     * Accessed with atomic ops.
 793     */
 794    unsigned int in_flight;
 795    unsigned int serialising_in_flight;
 796
 797    /* counter for nested bdrv_io_plug.
 798     * Accessed with atomic ops.
 799    */
 800    unsigned io_plugged;
 801
 802    /* do we need to tell the quest if we have a volatile write cache? */
 803    int enable_write_cache;
 804
 805    /* Accessed with atomic ops.  */
 806    int quiesce_counter;
 807    int recursive_quiesce_counter;
 808
 809    unsigned int write_gen;               /* Current data generation */
 810
 811    /* Protected by reqs_lock.  */
 812    CoMutex reqs_lock;
 813    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
 814    CoQueue flush_queue;                  /* Serializing flush queue */
 815    bool active_flush_req;                /* Flush request in flight? */
 816
 817    /* Only read/written by whoever has set active_flush_req to true.  */
 818    unsigned int flushed_gen;             /* Flushed write generation */
 819};
 820
 821struct BlockBackendRootState {
 822    int open_flags;
 823    bool read_only;
 824    BlockdevDetectZeroesOptions detect_zeroes;
 825};
 826
 827typedef enum BlockMirrorBackingMode {
 828    /* Reuse the existing backing chain from the source for the target.
 829     * - sync=full: Set backing BDS to NULL.
 830     * - sync=top:  Use source's backing BDS.
 831     * - sync=none: Use source as the backing BDS. */
 832    MIRROR_SOURCE_BACKING_CHAIN,
 833
 834    /* Open the target's backing chain completely anew */
 835    MIRROR_OPEN_BACKING_CHAIN,
 836
 837    /* Do not change the target's backing BDS after job completion */
 838    MIRROR_LEAVE_BACKING_CHAIN,
 839} BlockMirrorBackingMode;
 840
 841static inline BlockDriverState *backing_bs(BlockDriverState *bs)
 842{
 843    return bs->backing ? bs->backing->bs : NULL;
 844}
 845
 846
 847/* Essential block drivers which must always be statically linked into qemu, and
 848 * which therefore can be accessed without using bdrv_find_format() */
 849extern BlockDriver bdrv_file;
 850extern BlockDriver bdrv_raw;
 851extern BlockDriver bdrv_qcow2;
 852
 853int coroutine_fn bdrv_co_preadv(BdrvChild *child,
 854    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
 855    BdrvRequestFlags flags);
 856int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
 857    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
 858    BdrvRequestFlags flags);
 859
 860extern unsigned int bdrv_drain_all_count;
 861void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
 862void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
 863
 864int get_tmp_filename(char *filename, int size);
 865BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
 866                            const char *filename);
 867
 868void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
 869                                      QDict *options);
 870
 871
 872/**
 873 * bdrv_add_before_write_notifier:
 874 *
 875 * Register a callback that is invoked before write requests are processed but
 876 * after any throttling or waiting for overlapping requests.
 877 */
 878void bdrv_add_before_write_notifier(BlockDriverState *bs,
 879                                    NotifierWithReturn *notifier);
 880
 881/**
 882 * bdrv_detach_aio_context:
 883 *
 884 * May be called from .bdrv_detach_aio_context() to detach children from the
 885 * current #AioContext.  This is only needed by block drivers that manage their
 886 * own children.  Both ->file and ->backing are automatically handled and
 887 * block drivers should not call this function on them explicitly.
 888 */
 889void bdrv_detach_aio_context(BlockDriverState *bs);
 890
 891/**
 892 * bdrv_attach_aio_context:
 893 *
 894 * May be called from .bdrv_attach_aio_context() to attach children to the new
 895 * #AioContext.  This is only needed by block drivers that manage their own
 896 * children.  Both ->file and ->backing are automatically handled and block
 897 * drivers should not call this function on them explicitly.
 898 */
 899void bdrv_attach_aio_context(BlockDriverState *bs,
 900                             AioContext *new_context);
 901
 902/**
 903 * bdrv_add_aio_context_notifier:
 904 *
 905 * If a long-running job intends to be always run in the same AioContext as a
 906 * certain BDS, it may use this function to be notified of changes regarding the
 907 * association of the BDS to an AioContext.
 908 *
 909 * attached_aio_context() is called after the target BDS has been attached to a
 910 * new AioContext; detach_aio_context() is called before the target BDS is being
 911 * detached from its old AioContext.
 912 */
 913void bdrv_add_aio_context_notifier(BlockDriverState *bs,
 914        void (*attached_aio_context)(AioContext *new_context, void *opaque),
 915        void (*detach_aio_context)(void *opaque), void *opaque);
 916
 917/**
 918 * bdrv_remove_aio_context_notifier:
 919 *
 920 * Unsubscribe of change notifications regarding the BDS's AioContext. The
 921 * parameters given here have to be the same as those given to
 922 * bdrv_add_aio_context_notifier().
 923 */
 924void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
 925                                      void (*aio_context_attached)(AioContext *,
 926                                                                   void *),
 927                                      void (*aio_context_detached)(void *),
 928                                      void *opaque);
 929
 930/**
 931 * bdrv_wakeup:
 932 * @bs: The BlockDriverState for which an I/O operation has been completed.
 933 *
 934 * Wake up the main thread if it is waiting on BDRV_POLL_WHILE.  During
 935 * synchronous I/O on a BlockDriverState that is attached to another
 936 * I/O thread, the main thread lets the I/O thread's event loop run,
 937 * waiting for the I/O operation to complete.  A bdrv_wakeup will wake
 938 * up the main thread if necessary.
 939 *
 940 * Manual calls to bdrv_wakeup are rarely necessary, because
 941 * bdrv_dec_in_flight already calls it.
 942 */
 943void bdrv_wakeup(BlockDriverState *bs);
 944
 945#ifdef _WIN32
 946int is_windows_drive(const char *filename);
 947#endif
 948
 949/**
 950 * stream_start:
 951 * @job_id: The id of the newly-created job, or %NULL to use the
 952 * device name of @bs.
 953 * @bs: Block device to operate on.
 954 * @base: Block device that will become the new base, or %NULL to
 955 * flatten the whole backing file chain onto @bs.
 956 * @backing_file_str: The file name that will be written to @bs as the
 957 * the new backing file if the job completes. Ignored if @base is %NULL.
 958 * @creation_flags: Flags that control the behavior of the Job lifetime.
 959 *                  See @BlockJobCreateFlags
 960 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 961 * @on_error: The action to take upon error.
 962 * @errp: Error object.
 963 *
 964 * Start a streaming operation on @bs.  Clusters that are unallocated
 965 * in @bs, but allocated in any image between @base and @bs (both
 966 * exclusive) will be written to @bs.  At the end of a successful
 967 * streaming job, the backing file of @bs will be changed to
 968 * @backing_file_str in the written image and to @base in the live
 969 * BlockDriverState.
 970 */
 971void stream_start(const char *job_id, BlockDriverState *bs,
 972                  BlockDriverState *base, const char *backing_file_str,
 973                  int creation_flags, int64_t speed,
 974                  BlockdevOnError on_error, Error **errp);
 975
 976/**
 977 * commit_start:
 978 * @job_id: The id of the newly-created job, or %NULL to use the
 979 * device name of @bs.
 980 * @bs: Active block device.
 981 * @top: Top block device to be committed.
 982 * @base: Block device that will be written into, and become the new top.
 983 * @creation_flags: Flags that control the behavior of the Job lifetime.
 984 *                  See @BlockJobCreateFlags
 985 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 986 * @on_error: The action to take upon error.
 987 * @backing_file_str: String to use as the backing file in @top's overlay
 988 * @filter_node_name: The node name that should be assigned to the filter
 989 * driver that the commit job inserts into the graph above @top. NULL means
 990 * that a node name should be autogenerated.
 991 * @errp: Error object.
 992 *
 993 */
 994void commit_start(const char *job_id, BlockDriverState *bs,
 995                  BlockDriverState *base, BlockDriverState *top,
 996                  int creation_flags, int64_t speed,
 997                  BlockdevOnError on_error, const char *backing_file_str,
 998                  const char *filter_node_name, Error **errp);
 999/**
1000 * commit_active_start:
1001 * @job_id: The id of the newly-created job, or %NULL to use the
1002 * device name of @bs.
1003 * @bs: Active block device to be committed.
1004 * @base: Block device that will be written into, and become the new top.
1005 * @creation_flags: Flags that control the behavior of the Job lifetime.
1006 *                  See @BlockJobCreateFlags
1007 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
1008 * @on_error: The action to take upon error.
1009 * @filter_node_name: The node name that should be assigned to the filter
1010 * driver that the commit job inserts into the graph above @bs. NULL means that
1011 * a node name should be autogenerated.
1012 * @cb: Completion function for the job.
1013 * @opaque: Opaque pointer value passed to @cb.
1014 * @auto_complete: Auto complete the job.
1015 * @errp: Error object.
1016 *
1017 */
1018void commit_active_start(const char *job_id, BlockDriverState *bs,
1019                         BlockDriverState *base, int creation_flags,
1020                         int64_t speed, BlockdevOnError on_error,
1021                         const char *filter_node_name,
1022                         BlockCompletionFunc *cb, void *opaque,
1023                         bool auto_complete, Error **errp);
1024/*
1025 * mirror_start:
1026 * @job_id: The id of the newly-created job, or %NULL to use the
1027 * device name of @bs.
1028 * @bs: Block device to operate on.
1029 * @target: Block device to write to.
1030 * @replaces: Block graph node name to replace once the mirror is done. Can
1031 *            only be used when full mirroring is selected.
1032 * @creation_flags: Flags that control the behavior of the Job lifetime.
1033 *                  See @BlockJobCreateFlags
1034 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
1035 * @granularity: The chosen granularity for the dirty bitmap.
1036 * @buf_size: The amount of data that can be in flight at one time.
1037 * @mode: Whether to collapse all images in the chain to the target.
1038 * @backing_mode: How to establish the target's backing chain after completion.
1039 * @on_source_error: The action to take upon error reading from the source.
1040 * @on_target_error: The action to take upon error writing to the target.
1041 * @unmap: Whether to unmap target where source sectors only contain zeroes.
1042 * @filter_node_name: The node name that should be assigned to the filter
1043 * driver that the mirror job inserts into the graph above @bs. NULL means that
1044 * a node name should be autogenerated.
1045 * @copy_mode: When to trigger writes to the target.
1046 * @errp: Error object.
1047 *
1048 * Start a mirroring operation on @bs.  Clusters that are allocated
1049 * in @bs will be written to @target until the job is cancelled or
1050 * manually completed.  At the end of a successful mirroring job,
1051 * @bs will be switched to read from @target.
1052 */
1053void mirror_start(const char *job_id, BlockDriverState *bs,
1054                  BlockDriverState *target, const char *replaces,
1055                  int creation_flags, int64_t speed,
1056                  uint32_t granularity, int64_t buf_size,
1057                  MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
1058                  BlockdevOnError on_source_error,
1059                  BlockdevOnError on_target_error,
1060                  bool unmap, const char *filter_node_name,
1061                  MirrorCopyMode copy_mode, Error **errp);
1062
1063/*
1064 * backup_job_create:
1065 * @job_id: The id of the newly-created job, or %NULL to use the
1066 * device name of @bs.
1067 * @bs: Block device to operate on.
1068 * @target: Block device to write to.
1069 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
1070 * @sync_mode: What parts of the disk image should be copied to the destination.
1071 * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL.
1072 * @on_source_error: The action to take upon error reading from the source.
1073 * @on_target_error: The action to take upon error writing to the target.
1074 * @creation_flags: Flags that control the behavior of the Job lifetime.
1075 *                  See @BlockJobCreateFlags
1076 * @cb: Completion function for the job.
1077 * @opaque: Opaque pointer value passed to @cb.
1078 * @txn: Transaction that this job is part of (may be NULL).
1079 *
1080 * Create a backup operation on @bs.  Clusters in @bs are written to @target
1081 * until the job is cancelled or manually completed.
1082 */
1083BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
1084                            BlockDriverState *target, int64_t speed,
1085                            MirrorSyncMode sync_mode,
1086                            BdrvDirtyBitmap *sync_bitmap,
1087                            bool compress,
1088                            BlockdevOnError on_source_error,
1089                            BlockdevOnError on_target_error,
1090                            int creation_flags,
1091                            BlockCompletionFunc *cb, void *opaque,
1092                            JobTxn *txn, Error **errp);
1093
1094void hmp_drive_add_node(Monitor *mon, const char *optstr);
1095
1096BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1097                                  const char *child_name,
1098                                  const BdrvChildRole *child_role,
1099                                  uint64_t perm, uint64_t shared_perm,
1100                                  void *opaque, Error **errp);
1101void bdrv_root_unref_child(BdrvChild *child);
1102
1103int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
1104                            Error **errp);
1105
1106/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
1107 * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to
1108 * all children */
1109void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
1110                               const BdrvChildRole *role,
1111                               BlockReopenQueue *reopen_queue,
1112                               uint64_t perm, uint64_t shared,
1113                               uint64_t *nperm, uint64_t *nshared);
1114
1115/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
1116 * (non-raw) image formats: Like above for bs->backing, but for bs->file it
1117 * requires WRITE | RESIZE for read-write images, always requires
1118 * CONSISTENT_READ and doesn't share WRITE. */
1119void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
1120                               const BdrvChildRole *role,
1121                               BlockReopenQueue *reopen_queue,
1122                               uint64_t perm, uint64_t shared,
1123                               uint64_t *nperm, uint64_t *nshared);
1124
1125/*
1126 * Default implementation for drivers to pass bdrv_co_block_status() to
1127 * their file.
1128 */
1129int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
1130                                                bool want_zero,
1131                                                int64_t offset,
1132                                                int64_t bytes,
1133                                                int64_t *pnum,
1134                                                int64_t *map,
1135                                                BlockDriverState **file);
1136/*
1137 * Default implementation for drivers to pass bdrv_co_block_status() to
1138 * their backing file.
1139 */
1140int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
1141                                                   bool want_zero,
1142                                                   int64_t offset,
1143                                                   int64_t bytes,
1144                                                   int64_t *pnum,
1145                                                   int64_t *map,
1146                                                   BlockDriverState **file);
1147const char *bdrv_get_parent_name(const BlockDriverState *bs);
1148void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
1149bool blk_dev_has_removable_media(BlockBackend *blk);
1150bool blk_dev_has_tray(BlockBackend *blk);
1151void blk_dev_eject_request(BlockBackend *blk, bool force);
1152bool blk_dev_is_tray_open(BlockBackend *blk);
1153bool blk_dev_is_medium_locked(BlockBackend *blk);
1154
1155void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
1156
1157void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
1158void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
1159
1160void bdrv_inc_in_flight(BlockDriverState *bs);
1161void bdrv_dec_in_flight(BlockDriverState *bs);
1162
1163void blockdev_close_all_bdrv_states(void);
1164
1165int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
1166                                         BdrvChild *dst, uint64_t dst_offset,
1167                                         uint64_t bytes,
1168                                         BdrvRequestFlags read_flags,
1169                                         BdrvRequestFlags write_flags);
1170int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
1171                                       BdrvChild *dst, uint64_t dst_offset,
1172                                       uint64_t bytes,
1173                                       BdrvRequestFlags read_flags,
1174                                       BdrvRequestFlags write_flags);
1175
1176int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
1177
1178#endif /* BLOCK_INT_H */
1179