qemu/block.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator block driver
   3 *
   4 * Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24#include "config-host.h"
  25#include "qemu-common.h"
  26#include "trace.h"
  27#include "block/block_int.h"
  28#include "block/blockjob.h"
  29#include "qemu/error-report.h"
  30#include "qemu/module.h"
  31#include "qapi/qmp/qerror.h"
  32#include "qapi/qmp/qjson.h"
  33#include "sysemu/block-backend.h"
  34#include "sysemu/sysemu.h"
  35#include "qemu/notify.h"
  36#include "qemu/coroutine.h"
  37#include "block/qapi.h"
  38#include "qmp-commands.h"
  39#include "qemu/timer.h"
  40#include "qapi-event.h"
  41#include "block/throttle-groups.h"
  42
  43#ifdef CONFIG_BSD
  44#include <sys/types.h>
  45#include <sys/stat.h>
  46#include <sys/ioctl.h>
  47#include <sys/queue.h>
  48#ifndef __DragonFly__
  49#include <sys/disk.h>
  50#endif
  51#endif
  52
  53#ifdef _WIN32
  54#include <windows.h>
  55#endif
  56
  57/**
  58 * A BdrvDirtyBitmap can be in three possible states:
  59 * (1) successor is NULL and disabled is false: full r/w mode
  60 * (2) successor is NULL and disabled is true: read only mode ("disabled")
  61 * (3) successor is set: frozen mode.
  62 *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
  63 *     or enabled. A frozen bitmap can only abdicate() or reclaim().
  64 */
  65struct BdrvDirtyBitmap {
  66    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
  67    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
  68    char *name;                 /* Optional non-empty unique ID */
  69    int64_t size;               /* Size of the bitmap (Number of sectors) */
  70    bool disabled;              /* Bitmap is read-only */
  71    QLIST_ENTRY(BdrvDirtyBitmap) list;
  72};
  73
  74#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
  75
  76struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
  77
  78static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
  79    QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
  80
  81static QLIST_HEAD(, BlockDriver) bdrv_drivers =
  82    QLIST_HEAD_INITIALIZER(bdrv_drivers);
  83
  84static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
  85                             const char *reference, QDict *options, int flags,
  86                             BlockDriverState *parent,
  87                             const BdrvChildRole *child_role, Error **errp);
  88
  89static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
  90/* If non-zero, use only whitelisted block drivers */
  91static int use_bdrv_whitelist;
  92
  93#ifdef _WIN32
  94static int is_windows_drive_prefix(const char *filename)
  95{
  96    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
  97             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
  98            filename[1] == ':');
  99}
 100
 101int is_windows_drive(const char *filename)
 102{
 103    if (is_windows_drive_prefix(filename) &&
 104        filename[2] == '\0')
 105        return 1;
 106    if (strstart(filename, "\\\\.\\", NULL) ||
 107        strstart(filename, "//./", NULL))
 108        return 1;
 109    return 0;
 110}
 111#endif
 112
 113size_t bdrv_opt_mem_align(BlockDriverState *bs)
 114{
 115    if (!bs || !bs->drv) {
 116        /* page size or 4k (hdd sector size) should be on the safe side */
 117        return MAX(4096, getpagesize());
 118    }
 119
 120    return bs->bl.opt_mem_alignment;
 121}
 122
 123size_t bdrv_min_mem_align(BlockDriverState *bs)
 124{
 125    if (!bs || !bs->drv) {
 126        /* page size or 4k (hdd sector size) should be on the safe side */
 127        return MAX(4096, getpagesize());
 128    }
 129
 130    return bs->bl.min_mem_alignment;
 131}
 132
 133/* check if the path starts with "<protocol>:" */
 134int path_has_protocol(const char *path)
 135{
 136    const char *p;
 137
 138#ifdef _WIN32
 139    if (is_windows_drive(path) ||
 140        is_windows_drive_prefix(path)) {
 141        return 0;
 142    }
 143    p = path + strcspn(path, ":/\\");
 144#else
 145    p = path + strcspn(path, ":/");
 146#endif
 147
 148    return *p == ':';
 149}
 150
 151int path_is_absolute(const char *path)
 152{
 153#ifdef _WIN32
 154    /* specific case for names like: "\\.\d:" */
 155    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
 156        return 1;
 157    }
 158    return (*path == '/' || *path == '\\');
 159#else
 160    return (*path == '/');
 161#endif
 162}
 163
 164/* if filename is absolute, just copy it to dest. Otherwise, build a
 165   path to it by considering it is relative to base_path. URL are
 166   supported. */
 167void path_combine(char *dest, int dest_size,
 168                  const char *base_path,
 169                  const char *filename)
 170{
 171    const char *p, *p1;
 172    int len;
 173
 174    if (dest_size <= 0)
 175        return;
 176    if (path_is_absolute(filename)) {
 177        pstrcpy(dest, dest_size, filename);
 178    } else {
 179        p = strchr(base_path, ':');
 180        if (p)
 181            p++;
 182        else
 183            p = base_path;
 184        p1 = strrchr(base_path, '/');
 185#ifdef _WIN32
 186        {
 187            const char *p2;
 188            p2 = strrchr(base_path, '\\');
 189            if (!p1 || p2 > p1)
 190                p1 = p2;
 191        }
 192#endif
 193        if (p1)
 194            p1++;
 195        else
 196            p1 = base_path;
 197        if (p1 > p)
 198            p = p1;
 199        len = p - base_path;
 200        if (len > dest_size - 1)
 201            len = dest_size - 1;
 202        memcpy(dest, base_path, len);
 203        dest[len] = '\0';
 204        pstrcat(dest, dest_size, filename);
 205    }
 206}
 207
 208void bdrv_get_full_backing_filename_from_filename(const char *backed,
 209                                                  const char *backing,
 210                                                  char *dest, size_t sz,
 211                                                  Error **errp)
 212{
 213    if (backing[0] == '\0' || path_has_protocol(backing) ||
 214        path_is_absolute(backing))
 215    {
 216        pstrcpy(dest, sz, backing);
 217    } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
 218        error_setg(errp, "Cannot use relative backing file names for '%s'",
 219                   backed);
 220    } else {
 221        path_combine(dest, sz, backed, backing);
 222    }
 223}
 224
 225void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
 226                                    Error **errp)
 227{
 228    char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
 229
 230    bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
 231                                                 dest, sz, errp);
 232}
 233
 234void bdrv_register(BlockDriver *bdrv)
 235{
 236    bdrv_setup_io_funcs(bdrv);
 237
 238    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 239}
 240
 241BlockDriverState *bdrv_new_root(void)
 242{
 243    BlockDriverState *bs = bdrv_new();
 244
 245    QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
 246    return bs;
 247}
 248
 249BlockDriverState *bdrv_new(void)
 250{
 251    BlockDriverState *bs;
 252    int i;
 253
 254    bs = g_new0(BlockDriverState, 1);
 255    QLIST_INIT(&bs->dirty_bitmaps);
 256    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
 257        QLIST_INIT(&bs->op_blockers[i]);
 258    }
 259    notifier_list_init(&bs->close_notifiers);
 260    notifier_with_return_list_init(&bs->before_write_notifiers);
 261    qemu_co_queue_init(&bs->throttled_reqs[0]);
 262    qemu_co_queue_init(&bs->throttled_reqs[1]);
 263    bs->refcnt = 1;
 264    bs->aio_context = qemu_get_aio_context();
 265
 266    return bs;
 267}
 268
 269void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
 270{
 271    notifier_list_add(&bs->close_notifiers, notify);
 272}
 273
 274BlockDriver *bdrv_find_format(const char *format_name)
 275{
 276    BlockDriver *drv1;
 277    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
 278        if (!strcmp(drv1->format_name, format_name)) {
 279            return drv1;
 280        }
 281    }
 282    return NULL;
 283}
 284
 285static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
 286{
 287    static const char *whitelist_rw[] = {
 288        CONFIG_BDRV_RW_WHITELIST
 289    };
 290    static const char *whitelist_ro[] = {
 291        CONFIG_BDRV_RO_WHITELIST
 292    };
 293    const char **p;
 294
 295    if (!whitelist_rw[0] && !whitelist_ro[0]) {
 296        return 1;               /* no whitelist, anything goes */
 297    }
 298
 299    for (p = whitelist_rw; *p; p++) {
 300        if (!strcmp(drv->format_name, *p)) {
 301            return 1;
 302        }
 303    }
 304    if (read_only) {
 305        for (p = whitelist_ro; *p; p++) {
 306            if (!strcmp(drv->format_name, *p)) {
 307                return 1;
 308            }
 309        }
 310    }
 311    return 0;
 312}
 313
 314typedef struct CreateCo {
 315    BlockDriver *drv;
 316    char *filename;
 317    QemuOpts *opts;
 318    int ret;
 319    Error *err;
 320} CreateCo;
 321
 322static void coroutine_fn bdrv_create_co_entry(void *opaque)
 323{
 324    Error *local_err = NULL;
 325    int ret;
 326
 327    CreateCo *cco = opaque;
 328    assert(cco->drv);
 329
 330    ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
 331    if (local_err) {
 332        error_propagate(&cco->err, local_err);
 333    }
 334    cco->ret = ret;
 335}
 336
 337int bdrv_create(BlockDriver *drv, const char* filename,
 338                QemuOpts *opts, Error **errp)
 339{
 340    int ret;
 341
 342    Coroutine *co;
 343    CreateCo cco = {
 344        .drv = drv,
 345        .filename = g_strdup(filename),
 346        .opts = opts,
 347        .ret = NOT_DONE,
 348        .err = NULL,
 349    };
 350
 351    if (!drv->bdrv_create) {
 352        error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
 353        ret = -ENOTSUP;
 354        goto out;
 355    }
 356
 357    if (qemu_in_coroutine()) {
 358        /* Fast-path if already in coroutine context */
 359        bdrv_create_co_entry(&cco);
 360    } else {
 361        co = qemu_coroutine_create(bdrv_create_co_entry);
 362        qemu_coroutine_enter(co, &cco);
 363        while (cco.ret == NOT_DONE) {
 364            aio_poll(qemu_get_aio_context(), true);
 365        }
 366    }
 367
 368    ret = cco.ret;
 369    if (ret < 0) {
 370        if (cco.err) {
 371            error_propagate(errp, cco.err);
 372        } else {
 373            error_setg_errno(errp, -ret, "Could not create image");
 374        }
 375    }
 376
 377out:
 378    g_free(cco.filename);
 379    return ret;
 380}
 381
 382int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
 383{
 384    BlockDriver *drv;
 385    Error *local_err = NULL;
 386    int ret;
 387
 388    drv = bdrv_find_protocol(filename, true, errp);
 389    if (drv == NULL) {
 390        return -ENOENT;
 391    }
 392
 393    ret = bdrv_create(drv, filename, opts, &local_err);
 394    if (local_err) {
 395        error_propagate(errp, local_err);
 396    }
 397    return ret;
 398}
 399
 400/**
 401 * Try to get @bs's logical and physical block size.
 402 * On success, store them in @bsz struct and return 0.
 403 * On failure return -errno.
 404 * @bs must not be empty.
 405 */
 406int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 407{
 408    BlockDriver *drv = bs->drv;
 409
 410    if (drv && drv->bdrv_probe_blocksizes) {
 411        return drv->bdrv_probe_blocksizes(bs, bsz);
 412    }
 413
 414    return -ENOTSUP;
 415}
 416
 417/**
 418 * Try to get @bs's geometry (cyls, heads, sectors).
 419 * On success, store them in @geo struct and return 0.
 420 * On failure return -errno.
 421 * @bs must not be empty.
 422 */
 423int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 424{
 425    BlockDriver *drv = bs->drv;
 426
 427    if (drv && drv->bdrv_probe_geometry) {
 428        return drv->bdrv_probe_geometry(bs, geo);
 429    }
 430
 431    return -ENOTSUP;
 432}
 433
 434/*
 435 * Create a uniquely-named empty temporary file.
 436 * Return 0 upon success, otherwise a negative errno value.
 437 */
 438int get_tmp_filename(char *filename, int size)
 439{
 440#ifdef _WIN32
 441    char temp_dir[MAX_PATH];
 442    /* GetTempFileName requires that its output buffer (4th param)
 443       have length MAX_PATH or greater.  */
 444    assert(size >= MAX_PATH);
 445    return (GetTempPath(MAX_PATH, temp_dir)
 446            && GetTempFileName(temp_dir, "qem", 0, filename)
 447            ? 0 : -GetLastError());
 448#else
 449    int fd;
 450    const char *tmpdir;
 451    tmpdir = getenv("TMPDIR");
 452    if (!tmpdir) {
 453        tmpdir = "/var/tmp";
 454    }
 455    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
 456        return -EOVERFLOW;
 457    }
 458    fd = mkstemp(filename);
 459    if (fd < 0) {
 460        return -errno;
 461    }
 462    if (close(fd) != 0) {
 463        unlink(filename);
 464        return -errno;
 465    }
 466    return 0;
 467#endif
 468}
 469
 470/*
 471 * Detect host devices. By convention, /dev/cdrom[N] is always
 472 * recognized as a host CDROM.
 473 */
 474static BlockDriver *find_hdev_driver(const char *filename)
 475{
 476    int score_max = 0, score;
 477    BlockDriver *drv = NULL, *d;
 478
 479    QLIST_FOREACH(d, &bdrv_drivers, list) {
 480        if (d->bdrv_probe_device) {
 481            score = d->bdrv_probe_device(filename);
 482            if (score > score_max) {
 483                score_max = score;
 484                drv = d;
 485            }
 486        }
 487    }
 488
 489    return drv;
 490}
 491
 492BlockDriver *bdrv_find_protocol(const char *filename,
 493                                bool allow_protocol_prefix,
 494                                Error **errp)
 495{
 496    BlockDriver *drv1;
 497    char protocol[128];
 498    int len;
 499    const char *p;
 500
 501    /* TODO Drivers without bdrv_file_open must be specified explicitly */
 502
 503    /*
 504     * XXX(hch): we really should not let host device detection
 505     * override an explicit protocol specification, but moving this
 506     * later breaks access to device names with colons in them.
 507     * Thanks to the brain-dead persistent naming schemes on udev-
 508     * based Linux systems those actually are quite common.
 509     */
 510    drv1 = find_hdev_driver(filename);
 511    if (drv1) {
 512        return drv1;
 513    }
 514
 515    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
 516        return &bdrv_file;
 517    }
 518
 519    p = strchr(filename, ':');
 520    assert(p != NULL);
 521    len = p - filename;
 522    if (len > sizeof(protocol) - 1)
 523        len = sizeof(protocol) - 1;
 524    memcpy(protocol, filename, len);
 525    protocol[len] = '\0';
 526    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
 527        if (drv1->protocol_name &&
 528            !strcmp(drv1->protocol_name, protocol)) {
 529            return drv1;
 530        }
 531    }
 532
 533    error_setg(errp, "Unknown protocol '%s'", protocol);
 534    return NULL;
 535}
 536
 537/*
 538 * Guess image format by probing its contents.
 539 * This is not a good idea when your image is raw (CVE-2008-2004), but
 540 * we do it anyway for backward compatibility.
 541 *
 542 * @buf         contains the image's first @buf_size bytes.
 543 * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
 544 *              but can be smaller if the image file is smaller)
 545 * @filename    is its filename.
 546 *
 547 * For all block drivers, call the bdrv_probe() method to get its
 548 * probing score.
 549 * Return the first block driver with the highest probing score.
 550 */
 551BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
 552                            const char *filename)
 553{
 554    int score_max = 0, score;
 555    BlockDriver *drv = NULL, *d;
 556
 557    QLIST_FOREACH(d, &bdrv_drivers, list) {
 558        if (d->bdrv_probe) {
 559            score = d->bdrv_probe(buf, buf_size, filename);
 560            if (score > score_max) {
 561                score_max = score;
 562                drv = d;
 563            }
 564        }
 565    }
 566
 567    return drv;
 568}
 569
 570static int find_image_format(BlockDriverState *bs, const char *filename,
 571                             BlockDriver **pdrv, Error **errp)
 572{
 573    BlockDriver *drv;
 574    uint8_t buf[BLOCK_PROBE_BUF_SIZE];
 575    int ret = 0;
 576
 577    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
 578    if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
 579        *pdrv = &bdrv_raw;
 580        return ret;
 581    }
 582
 583    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
 584    if (ret < 0) {
 585        error_setg_errno(errp, -ret, "Could not read image for determining its "
 586                         "format");
 587        *pdrv = NULL;
 588        return ret;
 589    }
 590
 591    drv = bdrv_probe_all(buf, ret, filename);
 592    if (!drv) {
 593        error_setg(errp, "Could not determine image format: No compatible "
 594                   "driver found");
 595        ret = -ENOENT;
 596    }
 597    *pdrv = drv;
 598    return ret;
 599}
 600
 601/**
 602 * Set the current 'total_sectors' value
 603 * Return 0 on success, -errno on error.
 604 */
 605static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
 606{
 607    BlockDriver *drv = bs->drv;
 608
 609    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
 610    if (bdrv_is_sg(bs))
 611        return 0;
 612
 613    /* query actual device if possible, otherwise just trust the hint */
 614    if (drv->bdrv_getlength) {
 615        int64_t length = drv->bdrv_getlength(bs);
 616        if (length < 0) {
 617            return length;
 618        }
 619        hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
 620    }
 621
 622    bs->total_sectors = hint;
 623    return 0;
 624}
 625
 626/**
 627 * Set open flags for a given discard mode
 628 *
 629 * Return 0 on success, -1 if the discard mode was invalid.
 630 */
 631int bdrv_parse_discard_flags(const char *mode, int *flags)
 632{
 633    *flags &= ~BDRV_O_UNMAP;
 634
 635    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
 636        /* do nothing */
 637    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
 638        *flags |= BDRV_O_UNMAP;
 639    } else {
 640        return -1;
 641    }
 642
 643    return 0;
 644}
 645
 646/**
 647 * Set open flags for a given cache mode
 648 *
 649 * Return 0 on success, -1 if the cache mode was invalid.
 650 */
 651int bdrv_parse_cache_flags(const char *mode, int *flags)
 652{
 653    *flags &= ~BDRV_O_CACHE_MASK;
 654
 655    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
 656        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
 657    } else if (!strcmp(mode, "directsync")) {
 658        *flags |= BDRV_O_NOCACHE;
 659    } else if (!strcmp(mode, "writeback")) {
 660        *flags |= BDRV_O_CACHE_WB;
 661    } else if (!strcmp(mode, "unsafe")) {
 662        *flags |= BDRV_O_CACHE_WB;
 663        *flags |= BDRV_O_NO_FLUSH;
 664    } else if (!strcmp(mode, "writethrough")) {
 665        /* this is the default */
 666    } else {
 667        return -1;
 668    }
 669
 670    return 0;
 671}
 672
 673/*
 674 * Returns the flags that a temporary snapshot should get, based on the
 675 * originally requested flags (the originally requested image will have flags
 676 * like a backing file)
 677 */
 678static int bdrv_temp_snapshot_flags(int flags)
 679{
 680    return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
 681}
 682
 683/*
 684 * Returns the flags that bs->file should get if a protocol driver is expected,
 685 * based on the given flags for the parent BDS
 686 */
 687static int bdrv_inherited_flags(int flags)
 688{
 689    /* Enable protocol handling, disable format probing for bs->file */
 690    flags |= BDRV_O_PROTOCOL;
 691
 692    /* Our block drivers take care to send flushes and respect unmap policy,
 693     * so we can enable both unconditionally on lower layers. */
 694    flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
 695
 696    /* Clear flags that only apply to the top layer */
 697    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
 698
 699    return flags;
 700}
 701
 702const BdrvChildRole child_file = {
 703    .inherit_flags = bdrv_inherited_flags,
 704};
 705
 706/*
 707 * Returns the flags that bs->file should get if the use of formats (and not
 708 * only protocols) is permitted for it, based on the given flags for the parent
 709 * BDS
 710 */
 711static int bdrv_inherited_fmt_flags(int parent_flags)
 712{
 713    int flags = child_file.inherit_flags(parent_flags);
 714    return flags & ~BDRV_O_PROTOCOL;
 715}
 716
 717const BdrvChildRole child_format = {
 718    .inherit_flags = bdrv_inherited_fmt_flags,
 719};
 720
 721/*
 722 * Returns the flags that bs->backing should get, based on the given flags
 723 * for the parent BDS
 724 */
 725static int bdrv_backing_flags(int flags)
 726{
 727    /* backing files always opened read-only */
 728    flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
 729
 730    /* snapshot=on is handled on the top layer */
 731    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
 732
 733    return flags;
 734}
 735
 736static const BdrvChildRole child_backing = {
 737    .inherit_flags = bdrv_backing_flags,
 738};
 739
 740static int bdrv_open_flags(BlockDriverState *bs, int flags)
 741{
 742    int open_flags = flags | BDRV_O_CACHE_WB;
 743
 744    /*
 745     * Clear flags that are internal to the block layer before opening the
 746     * image.
 747     */
 748    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
 749
 750    /*
 751     * Snapshots should be writable.
 752     */
 753    if (flags & BDRV_O_TEMPORARY) {
 754        open_flags |= BDRV_O_RDWR;
 755    }
 756
 757    return open_flags;
 758}
 759
 760static void bdrv_assign_node_name(BlockDriverState *bs,
 761                                  const char *node_name,
 762                                  Error **errp)
 763{
 764    char *gen_node_name = NULL;
 765
 766    if (!node_name) {
 767        node_name = gen_node_name = id_generate(ID_BLOCK);
 768    } else if (!id_wellformed(node_name)) {
 769        /*
 770         * Check for empty string or invalid characters, but not if it is
 771         * generated (generated names use characters not available to the user)
 772         */
 773        error_setg(errp, "Invalid node name");
 774        return;
 775    }
 776
 777    /* takes care of avoiding namespaces collisions */
 778    if (blk_by_name(node_name)) {
 779        error_setg(errp, "node-name=%s is conflicting with a device id",
 780                   node_name);
 781        goto out;
 782    }
 783
 784    /* takes care of avoiding duplicates node names */
 785    if (bdrv_find_node(node_name)) {
 786        error_setg(errp, "Duplicate node name");
 787        goto out;
 788    }
 789
 790    /* copy node name into the bs and insert it into the graph list */
 791    pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
 792    QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
 793out:
 794    g_free(gen_node_name);
 795}
 796
 797static QemuOptsList bdrv_runtime_opts = {
 798    .name = "bdrv_common",
 799    .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
 800    .desc = {
 801        {
 802            .name = "node-name",
 803            .type = QEMU_OPT_STRING,
 804            .help = "Node name of the block device node",
 805        },
 806        { /* end of list */ }
 807    },
 808};
 809
 810/*
 811 * Common part for opening disk images and files
 812 *
 813 * Removes all processed options from *options.
 814 */
 815static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
 816    QDict *options, int flags, BlockDriver *drv, Error **errp)
 817{
 818    int ret, open_flags;
 819    const char *filename;
 820    const char *node_name = NULL;
 821    QemuOpts *opts;
 822    Error *local_err = NULL;
 823
 824    assert(drv != NULL);
 825    assert(bs->file == NULL);
 826    assert(options != NULL && bs->options != options);
 827
 828    if (file != NULL) {
 829        filename = file->bs->filename;
 830    } else {
 831        filename = qdict_get_try_str(options, "filename");
 832    }
 833
 834    if (drv->bdrv_needs_filename && !filename) {
 835        error_setg(errp, "The '%s' block driver requires a file name",
 836                   drv->format_name);
 837        return -EINVAL;
 838    }
 839
 840    trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
 841
 842    opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
 843    qemu_opts_absorb_qdict(opts, options, &local_err);
 844    if (local_err) {
 845        error_propagate(errp, local_err);
 846        ret = -EINVAL;
 847        goto fail_opts;
 848    }
 849
 850    node_name = qemu_opt_get(opts, "node-name");
 851    bdrv_assign_node_name(bs, node_name, &local_err);
 852    if (local_err) {
 853        error_propagate(errp, local_err);
 854        ret = -EINVAL;
 855        goto fail_opts;
 856    }
 857
 858    bs->request_alignment = 512;
 859    bs->zero_beyond_eof = true;
 860    open_flags = bdrv_open_flags(bs, flags);
 861    bs->read_only = !(open_flags & BDRV_O_RDWR);
 862
 863    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
 864        error_setg(errp,
 865                   !bs->read_only && bdrv_is_whitelisted(drv, true)
 866                        ? "Driver '%s' can only be used for read-only devices"
 867                        : "Driver '%s' is not whitelisted",
 868                   drv->format_name);
 869        ret = -ENOTSUP;
 870        goto fail_opts;
 871    }
 872
 873    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
 874    if (flags & BDRV_O_COPY_ON_READ) {
 875        if (!bs->read_only) {
 876            bdrv_enable_copy_on_read(bs);
 877        } else {
 878            error_setg(errp, "Can't use copy-on-read on read-only device");
 879            ret = -EINVAL;
 880            goto fail_opts;
 881        }
 882    }
 883
 884    if (filename != NULL) {
 885        pstrcpy(bs->filename, sizeof(bs->filename), filename);
 886    } else {
 887        bs->filename[0] = '\0';
 888    }
 889    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
 890
 891    bs->drv = drv;
 892    bs->opaque = g_malloc0(drv->instance_size);
 893
 894    bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
 895
 896    /* Open the image, either directly or using a protocol */
 897    if (drv->bdrv_file_open) {
 898        assert(file == NULL);
 899        assert(!drv->bdrv_needs_filename || filename != NULL);
 900        ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
 901    } else {
 902        if (file == NULL) {
 903            error_setg(errp, "Can't use '%s' as a block driver for the "
 904                       "protocol level", drv->format_name);
 905            ret = -EINVAL;
 906            goto free_and_fail;
 907        }
 908        bs->file = file;
 909        ret = drv->bdrv_open(bs, options, open_flags, &local_err);
 910    }
 911
 912    if (ret < 0) {
 913        if (local_err) {
 914            error_propagate(errp, local_err);
 915        } else if (bs->filename[0]) {
 916            error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
 917        } else {
 918            error_setg_errno(errp, -ret, "Could not open image");
 919        }
 920        goto free_and_fail;
 921    }
 922
 923    if (bs->encrypted) {
 924        error_report("Encrypted images are deprecated");
 925        error_printf("Support for them will be removed in a future release.\n"
 926                     "You can use 'qemu-img convert' to convert your image"
 927                     " to an unencrypted one.\n");
 928    }
 929
 930    ret = refresh_total_sectors(bs, bs->total_sectors);
 931    if (ret < 0) {
 932        error_setg_errno(errp, -ret, "Could not refresh total sector count");
 933        goto free_and_fail;
 934    }
 935
 936    bdrv_refresh_limits(bs, &local_err);
 937    if (local_err) {
 938        error_propagate(errp, local_err);
 939        ret = -EINVAL;
 940        goto free_and_fail;
 941    }
 942
 943    assert(bdrv_opt_mem_align(bs) != 0);
 944    assert(bdrv_min_mem_align(bs) != 0);
 945    assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
 946
 947    qemu_opts_del(opts);
 948    return 0;
 949
 950free_and_fail:
 951    bs->file = NULL;
 952    g_free(bs->opaque);
 953    bs->opaque = NULL;
 954    bs->drv = NULL;
 955fail_opts:
 956    qemu_opts_del(opts);
 957    return ret;
 958}
 959
 960static QDict *parse_json_filename(const char *filename, Error **errp)
 961{
 962    QObject *options_obj;
 963    QDict *options;
 964    int ret;
 965
 966    ret = strstart(filename, "json:", &filename);
 967    assert(ret);
 968
 969    options_obj = qobject_from_json(filename);
 970    if (!options_obj) {
 971        error_setg(errp, "Could not parse the JSON options");
 972        return NULL;
 973    }
 974
 975    if (qobject_type(options_obj) != QTYPE_QDICT) {
 976        qobject_decref(options_obj);
 977        error_setg(errp, "Invalid JSON object given");
 978        return NULL;
 979    }
 980
 981    options = qobject_to_qdict(options_obj);
 982    qdict_flatten(options);
 983
 984    return options;
 985}
 986
 987/*
 988 * Fills in default options for opening images and converts the legacy
 989 * filename/flags pair to option QDict entries.
 990 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
 991 * block driver has been specified explicitly.
 992 */
 993static int bdrv_fill_options(QDict **options, const char **pfilename,
 994                             int *flags, Error **errp)
 995{
 996    const char *filename = *pfilename;
 997    const char *drvname;
 998    bool protocol = *flags & BDRV_O_PROTOCOL;
 999    bool parse_filename = false;
1000    BlockDriver *drv = NULL;
1001    Error *local_err = NULL;
1002
1003    /* Parse json: pseudo-protocol */
1004    if (filename && g_str_has_prefix(filename, "json:")) {
1005        QDict *json_options = parse_json_filename(filename, &local_err);
1006        if (local_err) {
1007            error_propagate(errp, local_err);
1008            return -EINVAL;
1009        }
1010
1011        /* Options given in the filename have lower priority than options
1012         * specified directly */
1013        qdict_join(*options, json_options, false);
1014        QDECREF(json_options);
1015        *pfilename = filename = NULL;
1016    }
1017
1018    drvname = qdict_get_try_str(*options, "driver");
1019    if (drvname) {
1020        drv = bdrv_find_format(drvname);
1021        if (!drv) {
1022            error_setg(errp, "Unknown driver '%s'", drvname);
1023            return -ENOENT;
1024        }
1025        /* If the user has explicitly specified the driver, this choice should
1026         * override the BDRV_O_PROTOCOL flag */
1027        protocol = drv->bdrv_file_open;
1028    }
1029
1030    if (protocol) {
1031        *flags |= BDRV_O_PROTOCOL;
1032    } else {
1033        *flags &= ~BDRV_O_PROTOCOL;
1034    }
1035
1036    /* Fetch the file name from the options QDict if necessary */
1037    if (protocol && filename) {
1038        if (!qdict_haskey(*options, "filename")) {
1039            qdict_put(*options, "filename", qstring_from_str(filename));
1040            parse_filename = true;
1041        } else {
1042            error_setg(errp, "Can't specify 'file' and 'filename' options at "
1043                             "the same time");
1044            return -EINVAL;
1045        }
1046    }
1047
1048    /* Find the right block driver */
1049    filename = qdict_get_try_str(*options, "filename");
1050
1051    if (!drvname && protocol) {
1052        if (filename) {
1053            drv = bdrv_find_protocol(filename, parse_filename, errp);
1054            if (!drv) {
1055                return -EINVAL;
1056            }
1057
1058            drvname = drv->format_name;
1059            qdict_put(*options, "driver", qstring_from_str(drvname));
1060        } else {
1061            error_setg(errp, "Must specify either driver or file");
1062            return -EINVAL;
1063        }
1064    }
1065
1066    assert(drv || !protocol);
1067
1068    /* Driver-specific filename parsing */
1069    if (drv && drv->bdrv_parse_filename && parse_filename) {
1070        drv->bdrv_parse_filename(filename, *options, &local_err);
1071        if (local_err) {
1072            error_propagate(errp, local_err);
1073            return -EINVAL;
1074        }
1075
1076        if (!drv->bdrv_needs_filename) {
1077            qdict_del(*options, "filename");
1078        }
1079    }
1080
1081    if (runstate_check(RUN_STATE_INMIGRATE)) {
1082        *flags |= BDRV_O_INCOMING;
1083    }
1084
1085    return 0;
1086}
1087
1088static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1089                                    BlockDriverState *child_bs,
1090                                    const BdrvChildRole *child_role)
1091{
1092    BdrvChild *child = g_new(BdrvChild, 1);
1093    *child = (BdrvChild) {
1094        .bs     = child_bs,
1095        .role   = child_role,
1096    };
1097
1098    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1099    QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1100
1101    return child;
1102}
1103
1104static void bdrv_detach_child(BdrvChild *child)
1105{
1106    QLIST_REMOVE(child, next);
1107    QLIST_REMOVE(child, next_parent);
1108    g_free(child);
1109}
1110
1111void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1112{
1113    BlockDriverState *child_bs;
1114
1115    if (child == NULL) {
1116        return;
1117    }
1118
1119    if (child->bs->inherits_from == parent) {
1120        child->bs->inherits_from = NULL;
1121    }
1122
1123    child_bs = child->bs;
1124    bdrv_detach_child(child);
1125    bdrv_unref(child_bs);
1126}
1127
1128/*
1129 * Sets the backing file link of a BDS. A new reference is created; callers
1130 * which don't need their own reference any more must call bdrv_unref().
1131 */
1132void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1133{
1134    if (backing_hd) {
1135        bdrv_ref(backing_hd);
1136    }
1137
1138    if (bs->backing) {
1139        assert(bs->backing_blocker);
1140        bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1141        bdrv_unref_child(bs, bs->backing);
1142    } else if (backing_hd) {
1143        error_setg(&bs->backing_blocker,
1144                   "node is used as backing hd of '%s'",
1145                   bdrv_get_device_or_node_name(bs));
1146    }
1147
1148    if (!backing_hd) {
1149        error_free(bs->backing_blocker);
1150        bs->backing_blocker = NULL;
1151        bs->backing = NULL;
1152        goto out;
1153    }
1154    bs->backing = bdrv_attach_child(bs, backing_hd, &child_backing);
1155    bs->open_flags &= ~BDRV_O_NO_BACKING;
1156    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1157    pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1158            backing_hd->drv ? backing_hd->drv->format_name : "");
1159
1160    bdrv_op_block_all(backing_hd, bs->backing_blocker);
1161    /* Otherwise we won't be able to commit due to check in bdrv_commit */
1162    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1163                    bs->backing_blocker);
1164out:
1165    bdrv_refresh_limits(bs, NULL);
1166}
1167
1168/*
1169 * Opens the backing file for a BlockDriverState if not yet open
1170 *
1171 * options is a QDict of options to pass to the block drivers, or NULL for an
1172 * empty set of options. The reference to the QDict is transferred to this
1173 * function (even on failure), so if the caller intends to reuse the dictionary,
1174 * it needs to use QINCREF() before calling bdrv_file_open.
1175 */
1176int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1177{
1178    char *backing_filename = g_malloc0(PATH_MAX);
1179    int ret = 0;
1180    BlockDriverState *backing_hd;
1181    Error *local_err = NULL;
1182
1183    if (bs->backing != NULL) {
1184        QDECREF(options);
1185        goto free_exit;
1186    }
1187
1188    /* NULL means an empty set of options */
1189    if (options == NULL) {
1190        options = qdict_new();
1191    }
1192
1193    bs->open_flags &= ~BDRV_O_NO_BACKING;
1194    if (qdict_haskey(options, "file.filename")) {
1195        backing_filename[0] = '\0';
1196    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1197        QDECREF(options);
1198        goto free_exit;
1199    } else {
1200        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1201                                       &local_err);
1202        if (local_err) {
1203            ret = -EINVAL;
1204            error_propagate(errp, local_err);
1205            QDECREF(options);
1206            goto free_exit;
1207        }
1208    }
1209
1210    if (!bs->drv || !bs->drv->supports_backing) {
1211        ret = -EINVAL;
1212        error_setg(errp, "Driver doesn't support backing files");
1213        QDECREF(options);
1214        goto free_exit;
1215    }
1216
1217    backing_hd = bdrv_new();
1218
1219    if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1220        qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1221    }
1222
1223    assert(bs->backing == NULL);
1224    ret = bdrv_open_inherit(&backing_hd,
1225                            *backing_filename ? backing_filename : NULL,
1226                            NULL, options, 0, bs, &child_backing, &local_err);
1227    if (ret < 0) {
1228        bdrv_unref(backing_hd);
1229        backing_hd = NULL;
1230        bs->open_flags |= BDRV_O_NO_BACKING;
1231        error_setg(errp, "Could not open backing file: %s",
1232                   error_get_pretty(local_err));
1233        error_free(local_err);
1234        goto free_exit;
1235    }
1236
1237    /* Hook up the backing file link; drop our reference, bs owns the
1238     * backing_hd reference now */
1239    bdrv_set_backing_hd(bs, backing_hd);
1240    bdrv_unref(backing_hd);
1241
1242free_exit:
1243    g_free(backing_filename);
1244    return ret;
1245}
1246
1247/*
1248 * Opens a disk image whose options are given as BlockdevRef in another block
1249 * device's options.
1250 *
1251 * If allow_none is true, no image will be opened if filename is false and no
1252 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1253 *
1254 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1255 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1256 * itself, all options starting with "${bdref_key}." are considered part of the
1257 * BlockdevRef.
1258 *
1259 * The BlockdevRef will be removed from the options QDict.
1260 */
1261BdrvChild *bdrv_open_child(const char *filename,
1262                           QDict *options, const char *bdref_key,
1263                           BlockDriverState* parent,
1264                           const BdrvChildRole *child_role,
1265                           bool allow_none, Error **errp)
1266{
1267    BdrvChild *c = NULL;
1268    BlockDriverState *bs;
1269    QDict *image_options;
1270    int ret;
1271    char *bdref_key_dot;
1272    const char *reference;
1273
1274    assert(child_role != NULL);
1275
1276    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1277    qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1278    g_free(bdref_key_dot);
1279
1280    reference = qdict_get_try_str(options, bdref_key);
1281    if (!filename && !reference && !qdict_size(image_options)) {
1282        if (!allow_none) {
1283            error_setg(errp, "A block device must be specified for \"%s\"",
1284                       bdref_key);
1285        }
1286        QDECREF(image_options);
1287        goto done;
1288    }
1289
1290    bs = NULL;
1291    ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1292                            parent, child_role, errp);
1293    if (ret < 0) {
1294        goto done;
1295    }
1296
1297    c = bdrv_attach_child(parent, bs, child_role);
1298
1299done:
1300    qdict_del(options, bdref_key);
1301    return c;
1302}
1303
1304int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1305{
1306    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1307    char *tmp_filename = g_malloc0(PATH_MAX + 1);
1308    int64_t total_size;
1309    QemuOpts *opts = NULL;
1310    QDict *snapshot_options;
1311    BlockDriverState *bs_snapshot;
1312    Error *local_err = NULL;
1313    int ret;
1314
1315    /* if snapshot, we create a temporary backing file and open it
1316       instead of opening 'filename' directly */
1317
1318    /* Get the required size from the image */
1319    total_size = bdrv_getlength(bs);
1320    if (total_size < 0) {
1321        ret = total_size;
1322        error_setg_errno(errp, -total_size, "Could not get image size");
1323        goto out;
1324    }
1325
1326    /* Create the temporary image */
1327    ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1328    if (ret < 0) {
1329        error_setg_errno(errp, -ret, "Could not get temporary filename");
1330        goto out;
1331    }
1332
1333    opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1334                            &error_abort);
1335    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1336    ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1337    qemu_opts_del(opts);
1338    if (ret < 0) {
1339        error_setg_errno(errp, -ret, "Could not create temporary overlay "
1340                         "'%s': %s", tmp_filename,
1341                         error_get_pretty(local_err));
1342        error_free(local_err);
1343        goto out;
1344    }
1345
1346    /* Prepare a new options QDict for the temporary file */
1347    snapshot_options = qdict_new();
1348    qdict_put(snapshot_options, "file.driver",
1349              qstring_from_str("file"));
1350    qdict_put(snapshot_options, "file.filename",
1351              qstring_from_str(tmp_filename));
1352    qdict_put(snapshot_options, "driver",
1353              qstring_from_str("qcow2"));
1354
1355    bs_snapshot = bdrv_new();
1356
1357    ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1358                    flags, &local_err);
1359    if (ret < 0) {
1360        error_propagate(errp, local_err);
1361        goto out;
1362    }
1363
1364    bdrv_append(bs_snapshot, bs);
1365
1366out:
1367    g_free(tmp_filename);
1368    return ret;
1369}
1370
1371/*
1372 * Opens a disk image (raw, qcow2, vmdk, ...)
1373 *
1374 * options is a QDict of options to pass to the block drivers, or NULL for an
1375 * empty set of options. The reference to the QDict belongs to the block layer
1376 * after the call (even on failure), so if the caller intends to reuse the
1377 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1378 *
1379 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1380 * If it is not NULL, the referenced BDS will be reused.
1381 *
1382 * The reference parameter may be used to specify an existing block device which
1383 * should be opened. If specified, neither options nor a filename may be given,
1384 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1385 */
1386static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1387                             const char *reference, QDict *options, int flags,
1388                             BlockDriverState *parent,
1389                             const BdrvChildRole *child_role, Error **errp)
1390{
1391    int ret;
1392    BdrvChild *file = NULL;
1393    BlockDriverState *bs;
1394    BlockDriver *drv = NULL;
1395    const char *drvname;
1396    const char *backing;
1397    Error *local_err = NULL;
1398    int snapshot_flags = 0;
1399
1400    assert(pbs);
1401    assert(!child_role || !flags);
1402    assert(!child_role == !parent);
1403
1404    if (reference) {
1405        bool options_non_empty = options ? qdict_size(options) : false;
1406        QDECREF(options);
1407
1408        if (*pbs) {
1409            error_setg(errp, "Cannot reuse an existing BDS when referencing "
1410                       "another block device");
1411            return -EINVAL;
1412        }
1413
1414        if (filename || options_non_empty) {
1415            error_setg(errp, "Cannot reference an existing block device with "
1416                       "additional options or a new filename");
1417            return -EINVAL;
1418        }
1419
1420        bs = bdrv_lookup_bs(reference, reference, errp);
1421        if (!bs) {
1422            return -ENODEV;
1423        }
1424        bdrv_ref(bs);
1425        *pbs = bs;
1426        return 0;
1427    }
1428
1429    if (*pbs) {
1430        bs = *pbs;
1431    } else {
1432        bs = bdrv_new();
1433    }
1434
1435    /* NULL means an empty set of options */
1436    if (options == NULL) {
1437        options = qdict_new();
1438    }
1439
1440    if (child_role) {
1441        bs->inherits_from = parent;
1442        flags = child_role->inherit_flags(parent->open_flags);
1443    }
1444
1445    ret = bdrv_fill_options(&options, &filename, &flags, &local_err);
1446    if (local_err) {
1447        goto fail;
1448    }
1449
1450    /* Find the right image format driver */
1451    drvname = qdict_get_try_str(options, "driver");
1452    if (drvname) {
1453        drv = bdrv_find_format(drvname);
1454        qdict_del(options, "driver");
1455        if (!drv) {
1456            error_setg(errp, "Unknown driver: '%s'", drvname);
1457            ret = -EINVAL;
1458            goto fail;
1459        }
1460    }
1461
1462    assert(drvname || !(flags & BDRV_O_PROTOCOL));
1463
1464    backing = qdict_get_try_str(options, "backing");
1465    if (backing && *backing == '\0') {
1466        flags |= BDRV_O_NO_BACKING;
1467        qdict_del(options, "backing");
1468    }
1469
1470    bs->open_flags = flags;
1471    bs->options = options;
1472    options = qdict_clone_shallow(options);
1473
1474    /* Open image file without format layer */
1475    if ((flags & BDRV_O_PROTOCOL) == 0) {
1476        if (flags & BDRV_O_RDWR) {
1477            flags |= BDRV_O_ALLOW_RDWR;
1478        }
1479        if (flags & BDRV_O_SNAPSHOT) {
1480            snapshot_flags = bdrv_temp_snapshot_flags(flags);
1481            flags = bdrv_backing_flags(flags);
1482        }
1483
1484        bs->open_flags = flags;
1485
1486        file = bdrv_open_child(filename, options, "file", bs,
1487                               &child_file, true, &local_err);
1488        if (local_err) {
1489            ret = -EINVAL;
1490            goto fail;
1491        }
1492    }
1493
1494    /* Image format probing */
1495    bs->probed = !drv;
1496    if (!drv && file) {
1497        ret = find_image_format(file->bs, filename, &drv, &local_err);
1498        if (ret < 0) {
1499            goto fail;
1500        }
1501    } else if (!drv) {
1502        error_setg(errp, "Must specify either driver or file");
1503        ret = -EINVAL;
1504        goto fail;
1505    }
1506
1507    /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1508    assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1509    /* file must be NULL if a protocol BDS is about to be created
1510     * (the inverse results in an error message from bdrv_open_common()) */
1511    assert(!(flags & BDRV_O_PROTOCOL) || !file);
1512
1513    /* Open the image */
1514    ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1515    if (ret < 0) {
1516        goto fail;
1517    }
1518
1519    if (file && (bs->file != file)) {
1520        bdrv_unref_child(bs, file);
1521        file = NULL;
1522    }
1523
1524    /* If there is a backing file, use it */
1525    if ((flags & BDRV_O_NO_BACKING) == 0) {
1526        QDict *backing_options;
1527
1528        qdict_extract_subqdict(options, &backing_options, "backing.");
1529        ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1530        if (ret < 0) {
1531            goto close_and_fail;
1532        }
1533    }
1534
1535    bdrv_refresh_filename(bs);
1536
1537    /* Check if any unknown options were used */
1538    if (options && (qdict_size(options) != 0)) {
1539        const QDictEntry *entry = qdict_first(options);
1540        if (flags & BDRV_O_PROTOCOL) {
1541            error_setg(errp, "Block protocol '%s' doesn't support the option "
1542                       "'%s'", drv->format_name, entry->key);
1543        } else {
1544            error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1545                       "support the option '%s'", drv->format_name,
1546                       bdrv_get_device_name(bs), entry->key);
1547        }
1548
1549        ret = -EINVAL;
1550        goto close_and_fail;
1551    }
1552
1553    if (!bdrv_key_required(bs)) {
1554        if (bs->blk) {
1555            blk_dev_change_media_cb(bs->blk, true);
1556        }
1557    } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1558               && !runstate_check(RUN_STATE_INMIGRATE)
1559               && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1560        error_setg(errp,
1561                   "Guest must be stopped for opening of encrypted image");
1562        ret = -EBUSY;
1563        goto close_and_fail;
1564    }
1565
1566    QDECREF(options);
1567    *pbs = bs;
1568
1569    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1570     * temporary snapshot afterwards. */
1571    if (snapshot_flags) {
1572        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1573        if (local_err) {
1574            goto close_and_fail;
1575        }
1576    }
1577
1578    return 0;
1579
1580fail:
1581    if (file != NULL) {
1582        bdrv_unref_child(bs, file);
1583    }
1584    QDECREF(bs->options);
1585    QDECREF(options);
1586    bs->options = NULL;
1587    if (!*pbs) {
1588        /* If *pbs is NULL, a new BDS has been created in this function and
1589           needs to be freed now. Otherwise, it does not need to be closed,
1590           since it has not really been opened yet. */
1591        bdrv_unref(bs);
1592    }
1593    if (local_err) {
1594        error_propagate(errp, local_err);
1595    }
1596    return ret;
1597
1598close_and_fail:
1599    /* See fail path, but now the BDS has to be always closed */
1600    if (*pbs) {
1601        bdrv_close(bs);
1602    } else {
1603        bdrv_unref(bs);
1604    }
1605    QDECREF(options);
1606    if (local_err) {
1607        error_propagate(errp, local_err);
1608    }
1609    return ret;
1610}
1611
1612int bdrv_open(BlockDriverState **pbs, const char *filename,
1613              const char *reference, QDict *options, int flags, Error **errp)
1614{
1615    return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1616                             NULL, errp);
1617}
1618
1619typedef struct BlockReopenQueueEntry {
1620     bool prepared;
1621     BDRVReopenState state;
1622     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1623} BlockReopenQueueEntry;
1624
1625/*
1626 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1627 * reopen of multiple devices.
1628 *
1629 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1630 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1631 * be created and initialized. This newly created BlockReopenQueue should be
1632 * passed back in for subsequent calls that are intended to be of the same
1633 * atomic 'set'.
1634 *
1635 * bs is the BlockDriverState to add to the reopen queue.
1636 *
1637 * options contains the changed options for the associated bs
1638 * (the BlockReopenQueue takes ownership)
1639 *
1640 * flags contains the open flags for the associated bs
1641 *
1642 * returns a pointer to bs_queue, which is either the newly allocated
1643 * bs_queue, or the existing bs_queue being used.
1644 *
1645 */
1646BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1647                                    BlockDriverState *bs,
1648                                    QDict *options, int flags)
1649{
1650    assert(bs != NULL);
1651
1652    BlockReopenQueueEntry *bs_entry;
1653    BdrvChild *child;
1654    QDict *old_options;
1655
1656    if (bs_queue == NULL) {
1657        bs_queue = g_new0(BlockReopenQueue, 1);
1658        QSIMPLEQ_INIT(bs_queue);
1659    }
1660
1661    if (!options) {
1662        options = qdict_new();
1663    }
1664
1665    old_options = qdict_clone_shallow(bs->options);
1666    qdict_join(options, old_options, false);
1667    QDECREF(old_options);
1668
1669    /* bdrv_open() masks this flag out */
1670    flags &= ~BDRV_O_PROTOCOL;
1671
1672    QLIST_FOREACH(child, &bs->children, next) {
1673        int child_flags;
1674
1675        if (child->bs->inherits_from != bs) {
1676            continue;
1677        }
1678
1679        child_flags = child->role->inherit_flags(flags);
1680        /* TODO Pass down child flags (backing.*, extents.*, ...) */
1681        bdrv_reopen_queue(bs_queue, child->bs, NULL, child_flags);
1682    }
1683
1684    bs_entry = g_new0(BlockReopenQueueEntry, 1);
1685    QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1686
1687    bs_entry->state.bs = bs;
1688    bs_entry->state.options = options;
1689    bs_entry->state.flags = flags;
1690
1691    return bs_queue;
1692}
1693
1694/*
1695 * Reopen multiple BlockDriverStates atomically & transactionally.
1696 *
1697 * The queue passed in (bs_queue) must have been built up previous
1698 * via bdrv_reopen_queue().
1699 *
1700 * Reopens all BDS specified in the queue, with the appropriate
1701 * flags.  All devices are prepared for reopen, and failure of any
1702 * device will cause all device changes to be abandonded, and intermediate
1703 * data cleaned up.
1704 *
1705 * If all devices prepare successfully, then the changes are committed
1706 * to all devices.
1707 *
1708 */
1709int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1710{
1711    int ret = -1;
1712    BlockReopenQueueEntry *bs_entry, *next;
1713    Error *local_err = NULL;
1714
1715    assert(bs_queue != NULL);
1716
1717    bdrv_drain_all();
1718
1719    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1720        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1721            error_propagate(errp, local_err);
1722            goto cleanup;
1723        }
1724        bs_entry->prepared = true;
1725    }
1726
1727    /* If we reach this point, we have success and just need to apply the
1728     * changes
1729     */
1730    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1731        bdrv_reopen_commit(&bs_entry->state);
1732    }
1733
1734    ret = 0;
1735
1736cleanup:
1737    QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1738        if (ret && bs_entry->prepared) {
1739            bdrv_reopen_abort(&bs_entry->state);
1740        }
1741        QDECREF(bs_entry->state.options);
1742        g_free(bs_entry);
1743    }
1744    g_free(bs_queue);
1745    return ret;
1746}
1747
1748
1749/* Reopen a single BlockDriverState with the specified flags. */
1750int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1751{
1752    int ret = -1;
1753    Error *local_err = NULL;
1754    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1755
1756    ret = bdrv_reopen_multiple(queue, &local_err);
1757    if (local_err != NULL) {
1758        error_propagate(errp, local_err);
1759    }
1760    return ret;
1761}
1762
1763
1764/*
1765 * Prepares a BlockDriverState for reopen. All changes are staged in the
1766 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1767 * the block driver layer .bdrv_reopen_prepare()
1768 *
1769 * bs is the BlockDriverState to reopen
1770 * flags are the new open flags
1771 * queue is the reopen queue
1772 *
1773 * Returns 0 on success, non-zero on error.  On error errp will be set
1774 * as well.
1775 *
1776 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1777 * It is the responsibility of the caller to then call the abort() or
1778 * commit() for any other BDS that have been left in a prepare() state
1779 *
1780 */
1781int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1782                        Error **errp)
1783{
1784    int ret = -1;
1785    Error *local_err = NULL;
1786    BlockDriver *drv;
1787
1788    assert(reopen_state != NULL);
1789    assert(reopen_state->bs->drv != NULL);
1790    drv = reopen_state->bs->drv;
1791
1792    /* if we are to stay read-only, do not allow permission change
1793     * to r/w */
1794    if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1795        reopen_state->flags & BDRV_O_RDWR) {
1796        error_setg(errp, "Node '%s' is read only",
1797                   bdrv_get_device_or_node_name(reopen_state->bs));
1798        goto error;
1799    }
1800
1801
1802    ret = bdrv_flush(reopen_state->bs);
1803    if (ret) {
1804        error_setg_errno(errp, -ret, "Error flushing drive");
1805        goto error;
1806    }
1807
1808    if (drv->bdrv_reopen_prepare) {
1809        ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1810        if (ret) {
1811            if (local_err != NULL) {
1812                error_propagate(errp, local_err);
1813            } else {
1814                error_setg(errp, "failed while preparing to reopen image '%s'",
1815                           reopen_state->bs->filename);
1816            }
1817            goto error;
1818        }
1819    } else {
1820        /* It is currently mandatory to have a bdrv_reopen_prepare()
1821         * handler for each supported drv. */
1822        error_setg(errp, "Block format '%s' used by node '%s' "
1823                   "does not support reopening files", drv->format_name,
1824                   bdrv_get_device_or_node_name(reopen_state->bs));
1825        ret = -1;
1826        goto error;
1827    }
1828
1829    /* Options that are not handled are only okay if they are unchanged
1830     * compared to the old state. It is expected that some options are only
1831     * used for the initial open, but not reopen (e.g. filename) */
1832    if (qdict_size(reopen_state->options)) {
1833        const QDictEntry *entry = qdict_first(reopen_state->options);
1834
1835        do {
1836            QString *new_obj = qobject_to_qstring(entry->value);
1837            const char *new = qstring_get_str(new_obj);
1838            const char *old = qdict_get_try_str(reopen_state->bs->options,
1839                                                entry->key);
1840
1841            if (!old || strcmp(new, old)) {
1842                error_setg(errp, "Cannot change the option '%s'", entry->key);
1843                ret = -EINVAL;
1844                goto error;
1845            }
1846        } while ((entry = qdict_next(reopen_state->options, entry)));
1847    }
1848
1849    ret = 0;
1850
1851error:
1852    return ret;
1853}
1854
1855/*
1856 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1857 * makes them final by swapping the staging BlockDriverState contents into
1858 * the active BlockDriverState contents.
1859 */
1860void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1861{
1862    BlockDriver *drv;
1863
1864    assert(reopen_state != NULL);
1865    drv = reopen_state->bs->drv;
1866    assert(drv != NULL);
1867
1868    /* If there are any driver level actions to take */
1869    if (drv->bdrv_reopen_commit) {
1870        drv->bdrv_reopen_commit(reopen_state);
1871    }
1872
1873    /* set BDS specific flags now */
1874    reopen_state->bs->open_flags         = reopen_state->flags;
1875    reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1876                                              BDRV_O_CACHE_WB);
1877    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1878
1879    bdrv_refresh_limits(reopen_state->bs, NULL);
1880}
1881
1882/*
1883 * Abort the reopen, and delete and free the staged changes in
1884 * reopen_state
1885 */
1886void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1887{
1888    BlockDriver *drv;
1889
1890    assert(reopen_state != NULL);
1891    drv = reopen_state->bs->drv;
1892    assert(drv != NULL);
1893
1894    if (drv->bdrv_reopen_abort) {
1895        drv->bdrv_reopen_abort(reopen_state);
1896    }
1897}
1898
1899
1900void bdrv_close(BlockDriverState *bs)
1901{
1902    BdrvAioNotifier *ban, *ban_next;
1903
1904    if (bs->job) {
1905        block_job_cancel_sync(bs->job);
1906    }
1907
1908    /* Disable I/O limits and drain all pending throttled requests */
1909    if (bs->throttle_state) {
1910        bdrv_io_limits_disable(bs);
1911    }
1912
1913    bdrv_drain(bs); /* complete I/O */
1914    bdrv_flush(bs);
1915    bdrv_drain(bs); /* in case flush left pending I/O */
1916    notifier_list_notify(&bs->close_notifiers, bs);
1917
1918    if (bs->blk) {
1919        blk_dev_change_media_cb(bs->blk, false);
1920    }
1921
1922    if (bs->drv) {
1923        BdrvChild *child, *next;
1924
1925        bs->drv->bdrv_close(bs);
1926        bs->drv = NULL;
1927
1928        bdrv_set_backing_hd(bs, NULL);
1929
1930        if (bs->file != NULL) {
1931            bdrv_unref_child(bs, bs->file);
1932            bs->file = NULL;
1933        }
1934
1935        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1936            /* TODO Remove bdrv_unref() from drivers' close function and use
1937             * bdrv_unref_child() here */
1938            if (child->bs->inherits_from == bs) {
1939                child->bs->inherits_from = NULL;
1940            }
1941            bdrv_detach_child(child);
1942        }
1943
1944        g_free(bs->opaque);
1945        bs->opaque = NULL;
1946        bs->copy_on_read = 0;
1947        bs->backing_file[0] = '\0';
1948        bs->backing_format[0] = '\0';
1949        bs->total_sectors = 0;
1950        bs->encrypted = 0;
1951        bs->valid_key = 0;
1952        bs->sg = 0;
1953        bs->zero_beyond_eof = false;
1954        QDECREF(bs->options);
1955        bs->options = NULL;
1956        QDECREF(bs->full_open_options);
1957        bs->full_open_options = NULL;
1958    }
1959
1960    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1961        g_free(ban);
1962    }
1963    QLIST_INIT(&bs->aio_notifiers);
1964}
1965
1966void bdrv_close_all(void)
1967{
1968    BlockDriverState *bs;
1969
1970    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1971        AioContext *aio_context = bdrv_get_aio_context(bs);
1972
1973        aio_context_acquire(aio_context);
1974        bdrv_close(bs);
1975        aio_context_release(aio_context);
1976    }
1977}
1978
1979/* Note that bs->device_list.tqe_prev is initially null,
1980 * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
1981 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1982 * resetting it to null on remove.  */
1983void bdrv_device_remove(BlockDriverState *bs)
1984{
1985    QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1986    bs->device_list.tqe_prev = NULL;
1987}
1988
1989/* make a BlockDriverState anonymous by removing from bdrv_state and
1990 * graph_bdrv_state list.
1991   Also, NULL terminate the device_name to prevent double remove */
1992void bdrv_make_anon(BlockDriverState *bs)
1993{
1994    /* Take care to remove bs from bdrv_states only when it's actually
1995     * in it. */
1996    if (bs->device_list.tqe_prev) {
1997        bdrv_device_remove(bs);
1998    }
1999    if (bs->node_name[0] != '\0') {
2000        QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2001    }
2002    bs->node_name[0] = '\0';
2003}
2004
2005/* Fields that need to stay with the top-level BDS */
2006static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2007                                     BlockDriverState *bs_src)
2008{
2009    /* move some fields that need to stay attached to the device */
2010
2011    /* dev info */
2012    bs_dest->copy_on_read       = bs_src->copy_on_read;
2013
2014    bs_dest->enable_write_cache = bs_src->enable_write_cache;
2015
2016    /* dirty bitmap */
2017    bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
2018}
2019
2020static void change_parent_backing_link(BlockDriverState *from,
2021                                       BlockDriverState *to)
2022{
2023    BdrvChild *c, *next;
2024
2025    QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2026        assert(c->role != &child_backing);
2027        c->bs = to;
2028        QLIST_REMOVE(c, next_parent);
2029        QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2030        bdrv_ref(to);
2031        bdrv_unref(from);
2032    }
2033    if (from->blk) {
2034        blk_set_bs(from->blk, to);
2035        if (!to->device_list.tqe_prev) {
2036            QTAILQ_INSERT_BEFORE(from, to, device_list);
2037        }
2038        bdrv_device_remove(from);
2039    }
2040}
2041
2042static void swap_feature_fields(BlockDriverState *bs_top,
2043                                BlockDriverState *bs_new)
2044{
2045    BlockDriverState tmp;
2046
2047    bdrv_move_feature_fields(&tmp, bs_top);
2048    bdrv_move_feature_fields(bs_top, bs_new);
2049    bdrv_move_feature_fields(bs_new, &tmp);
2050
2051    assert(!bs_new->throttle_state);
2052    if (bs_top->throttle_state) {
2053        assert(bs_top->io_limits_enabled);
2054        bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2055        bdrv_io_limits_disable(bs_top);
2056    }
2057}
2058
2059/*
2060 * Add new bs contents at the top of an image chain while the chain is
2061 * live, while keeping required fields on the top layer.
2062 *
2063 * This will modify the BlockDriverState fields, and swap contents
2064 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2065 *
2066 * bs_new must not be attached to a BlockBackend.
2067 *
2068 * This function does not create any image files.
2069 *
2070 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2071 * that's what the callers commonly need. bs_new will be referenced by the old
2072 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2073 * reference of its own, it must call bdrv_ref().
2074 */
2075void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2076{
2077    assert(!bdrv_requests_pending(bs_top));
2078    assert(!bdrv_requests_pending(bs_new));
2079
2080    bdrv_ref(bs_top);
2081    change_parent_backing_link(bs_top, bs_new);
2082
2083    /* Some fields always stay on top of the backing file chain */
2084    swap_feature_fields(bs_top, bs_new);
2085
2086    bdrv_set_backing_hd(bs_new, bs_top);
2087    bdrv_unref(bs_top);
2088
2089    /* bs_new is now referenced by its new parents, we don't need the
2090     * additional reference any more. */
2091    bdrv_unref(bs_new);
2092}
2093
2094void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2095{
2096    assert(!bdrv_requests_pending(old));
2097    assert(!bdrv_requests_pending(new));
2098
2099    bdrv_ref(old);
2100
2101    if (old->blk) {
2102        /* As long as these fields aren't in BlockBackend, but in the top-level
2103         * BlockDriverState, it's not possible for a BDS to have two BBs.
2104         *
2105         * We really want to copy the fields from old to new, but we go for a
2106         * swap instead so that pointers aren't duplicated and cause trouble.
2107         * (Also, bdrv_swap() used to do the same.) */
2108        assert(!new->blk);
2109        swap_feature_fields(old, new);
2110    }
2111    change_parent_backing_link(old, new);
2112
2113    /* Change backing files if a previously independent node is added to the
2114     * chain. For active commit, we replace top by its own (indirect) backing
2115     * file and don't do anything here so we don't build a loop. */
2116    if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2117        bdrv_set_backing_hd(new, backing_bs(old));
2118        bdrv_set_backing_hd(old, NULL);
2119    }
2120
2121    bdrv_unref(old);
2122}
2123
2124static void bdrv_delete(BlockDriverState *bs)
2125{
2126    assert(!bs->job);
2127    assert(bdrv_op_blocker_is_empty(bs));
2128    assert(!bs->refcnt);
2129    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2130
2131    bdrv_close(bs);
2132
2133    /* remove from list, if necessary */
2134    bdrv_make_anon(bs);
2135
2136    g_free(bs);
2137}
2138
2139/*
2140 * Run consistency checks on an image
2141 *
2142 * Returns 0 if the check could be completed (it doesn't mean that the image is
2143 * free of errors) or -errno when an internal error occurred. The results of the
2144 * check are stored in res.
2145 */
2146int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2147{
2148    if (bs->drv == NULL) {
2149        return -ENOMEDIUM;
2150    }
2151    if (bs->drv->bdrv_check == NULL) {
2152        return -ENOTSUP;
2153    }
2154
2155    memset(res, 0, sizeof(*res));
2156    return bs->drv->bdrv_check(bs, res, fix);
2157}
2158
2159#define COMMIT_BUF_SECTORS 2048
2160
2161/* commit COW file into the raw image */
2162int bdrv_commit(BlockDriverState *bs)
2163{
2164    BlockDriver *drv = bs->drv;
2165    int64_t sector, total_sectors, length, backing_length;
2166    int n, ro, open_flags;
2167    int ret = 0;
2168    uint8_t *buf = NULL;
2169
2170    if (!drv)
2171        return -ENOMEDIUM;
2172
2173    if (!bs->backing) {
2174        return -ENOTSUP;
2175    }
2176
2177    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2178        bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2179        return -EBUSY;
2180    }
2181
2182    ro = bs->backing->bs->read_only;
2183    open_flags =  bs->backing->bs->open_flags;
2184
2185    if (ro) {
2186        if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2187            return -EACCES;
2188        }
2189    }
2190
2191    length = bdrv_getlength(bs);
2192    if (length < 0) {
2193        ret = length;
2194        goto ro_cleanup;
2195    }
2196
2197    backing_length = bdrv_getlength(bs->backing->bs);
2198    if (backing_length < 0) {
2199        ret = backing_length;
2200        goto ro_cleanup;
2201    }
2202
2203    /* If our top snapshot is larger than the backing file image,
2204     * grow the backing file image if possible.  If not possible,
2205     * we must return an error */
2206    if (length > backing_length) {
2207        ret = bdrv_truncate(bs->backing->bs, length);
2208        if (ret < 0) {
2209            goto ro_cleanup;
2210        }
2211    }
2212
2213    total_sectors = length >> BDRV_SECTOR_BITS;
2214
2215    /* qemu_try_blockalign() for bs will choose an alignment that works for
2216     * bs->backing->bs as well, so no need to compare the alignment manually. */
2217    buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2218    if (buf == NULL) {
2219        ret = -ENOMEM;
2220        goto ro_cleanup;
2221    }
2222
2223    for (sector = 0; sector < total_sectors; sector += n) {
2224        ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2225        if (ret < 0) {
2226            goto ro_cleanup;
2227        }
2228        if (ret) {
2229            ret = bdrv_read(bs, sector, buf, n);
2230            if (ret < 0) {
2231                goto ro_cleanup;
2232            }
2233
2234            ret = bdrv_write(bs->backing->bs, sector, buf, n);
2235            if (ret < 0) {
2236                goto ro_cleanup;
2237            }
2238        }
2239    }
2240
2241    if (drv->bdrv_make_empty) {
2242        ret = drv->bdrv_make_empty(bs);
2243        if (ret < 0) {
2244            goto ro_cleanup;
2245        }
2246        bdrv_flush(bs);
2247    }
2248
2249    /*
2250     * Make sure all data we wrote to the backing device is actually
2251     * stable on disk.
2252     */
2253    if (bs->backing) {
2254        bdrv_flush(bs->backing->bs);
2255    }
2256
2257    ret = 0;
2258ro_cleanup:
2259    qemu_vfree(buf);
2260
2261    if (ro) {
2262        /* ignoring error return here */
2263        bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2264    }
2265
2266    return ret;
2267}
2268
2269int bdrv_commit_all(void)
2270{
2271    BlockDriverState *bs;
2272
2273    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2274        AioContext *aio_context = bdrv_get_aio_context(bs);
2275
2276        aio_context_acquire(aio_context);
2277        if (bs->drv && bs->backing) {
2278            int ret = bdrv_commit(bs);
2279            if (ret < 0) {
2280                aio_context_release(aio_context);
2281                return ret;
2282            }
2283        }
2284        aio_context_release(aio_context);
2285    }
2286    return 0;
2287}
2288
2289/*
2290 * Return values:
2291 * 0        - success
2292 * -EINVAL  - backing format specified, but no file
2293 * -ENOSPC  - can't update the backing file because no space is left in the
2294 *            image file header
2295 * -ENOTSUP - format driver doesn't support changing the backing file
2296 */
2297int bdrv_change_backing_file(BlockDriverState *bs,
2298    const char *backing_file, const char *backing_fmt)
2299{
2300    BlockDriver *drv = bs->drv;
2301    int ret;
2302
2303    /* Backing file format doesn't make sense without a backing file */
2304    if (backing_fmt && !backing_file) {
2305        return -EINVAL;
2306    }
2307
2308    if (drv->bdrv_change_backing_file != NULL) {
2309        ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2310    } else {
2311        ret = -ENOTSUP;
2312    }
2313
2314    if (ret == 0) {
2315        pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2316        pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2317    }
2318    return ret;
2319}
2320
2321/*
2322 * Finds the image layer in the chain that has 'bs' as its backing file.
2323 *
2324 * active is the current topmost image.
2325 *
2326 * Returns NULL if bs is not found in active's image chain,
2327 * or if active == bs.
2328 *
2329 * Returns the bottommost base image if bs == NULL.
2330 */
2331BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2332                                    BlockDriverState *bs)
2333{
2334    while (active && bs != backing_bs(active)) {
2335        active = backing_bs(active);
2336    }
2337
2338    return active;
2339}
2340
2341/* Given a BDS, searches for the base layer. */
2342BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2343{
2344    return bdrv_find_overlay(bs, NULL);
2345}
2346
2347/*
2348 * Drops images above 'base' up to and including 'top', and sets the image
2349 * above 'top' to have base as its backing file.
2350 *
2351 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2352 * information in 'bs' can be properly updated.
2353 *
2354 * E.g., this will convert the following chain:
2355 * bottom <- base <- intermediate <- top <- active
2356 *
2357 * to
2358 *
2359 * bottom <- base <- active
2360 *
2361 * It is allowed for bottom==base, in which case it converts:
2362 *
2363 * base <- intermediate <- top <- active
2364 *
2365 * to
2366 *
2367 * base <- active
2368 *
2369 * If backing_file_str is non-NULL, it will be used when modifying top's
2370 * overlay image metadata.
2371 *
2372 * Error conditions:
2373 *  if active == top, that is considered an error
2374 *
2375 */
2376int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2377                           BlockDriverState *base, const char *backing_file_str)
2378{
2379    BlockDriverState *new_top_bs = NULL;
2380    int ret = -EIO;
2381
2382    if (!top->drv || !base->drv) {
2383        goto exit;
2384    }
2385
2386    new_top_bs = bdrv_find_overlay(active, top);
2387
2388    if (new_top_bs == NULL) {
2389        /* we could not find the image above 'top', this is an error */
2390        goto exit;
2391    }
2392
2393    /* special case of new_top_bs->backing->bs already pointing to base - nothing
2394     * to do, no intermediate images */
2395    if (backing_bs(new_top_bs) == base) {
2396        ret = 0;
2397        goto exit;
2398    }
2399
2400    /* Make sure that base is in the backing chain of top */
2401    if (!bdrv_chain_contains(top, base)) {
2402        goto exit;
2403    }
2404
2405    /* success - we can delete the intermediate states, and link top->base */
2406    backing_file_str = backing_file_str ? backing_file_str : base->filename;
2407    ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2408                                   base->drv ? base->drv->format_name : "");
2409    if (ret) {
2410        goto exit;
2411    }
2412    bdrv_set_backing_hd(new_top_bs, base);
2413
2414    ret = 0;
2415exit:
2416    return ret;
2417}
2418
2419/**
2420 * Truncate file to 'offset' bytes (needed only for file protocols)
2421 */
2422int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2423{
2424    BlockDriver *drv = bs->drv;
2425    int ret;
2426    if (!drv)
2427        return -ENOMEDIUM;
2428    if (!drv->bdrv_truncate)
2429        return -ENOTSUP;
2430    if (bs->read_only)
2431        return -EACCES;
2432
2433    ret = drv->bdrv_truncate(bs, offset);
2434    if (ret == 0) {
2435        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2436        bdrv_dirty_bitmap_truncate(bs);
2437        if (bs->blk) {
2438            blk_dev_resize_cb(bs->blk);
2439        }
2440    }
2441    return ret;
2442}
2443
2444/**
2445 * Length of a allocated file in bytes. Sparse files are counted by actual
2446 * allocated space. Return < 0 if error or unknown.
2447 */
2448int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2449{
2450    BlockDriver *drv = bs->drv;
2451    if (!drv) {
2452        return -ENOMEDIUM;
2453    }
2454    if (drv->bdrv_get_allocated_file_size) {
2455        return drv->bdrv_get_allocated_file_size(bs);
2456    }
2457    if (bs->file) {
2458        return bdrv_get_allocated_file_size(bs->file->bs);
2459    }
2460    return -ENOTSUP;
2461}
2462
2463/**
2464 * Return number of sectors on success, -errno on error.
2465 */
2466int64_t bdrv_nb_sectors(BlockDriverState *bs)
2467{
2468    BlockDriver *drv = bs->drv;
2469
2470    if (!drv)
2471        return -ENOMEDIUM;
2472
2473    if (drv->has_variable_length) {
2474        int ret = refresh_total_sectors(bs, bs->total_sectors);
2475        if (ret < 0) {
2476            return ret;
2477        }
2478    }
2479    return bs->total_sectors;
2480}
2481
2482/**
2483 * Return length in bytes on success, -errno on error.
2484 * The length is always a multiple of BDRV_SECTOR_SIZE.
2485 */
2486int64_t bdrv_getlength(BlockDriverState *bs)
2487{
2488    int64_t ret = bdrv_nb_sectors(bs);
2489
2490    ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2491    return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2492}
2493
2494/* return 0 as number of sectors if no device present or error */
2495void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2496{
2497    int64_t nb_sectors = bdrv_nb_sectors(bs);
2498
2499    *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2500}
2501
2502int bdrv_is_read_only(BlockDriverState *bs)
2503{
2504    return bs->read_only;
2505}
2506
2507int bdrv_is_sg(BlockDriverState *bs)
2508{
2509    return bs->sg;
2510}
2511
2512int bdrv_enable_write_cache(BlockDriverState *bs)
2513{
2514    return bs->enable_write_cache;
2515}
2516
2517void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2518{
2519    bs->enable_write_cache = wce;
2520
2521    /* so a reopen() will preserve wce */
2522    if (wce) {
2523        bs->open_flags |= BDRV_O_CACHE_WB;
2524    } else {
2525        bs->open_flags &= ~BDRV_O_CACHE_WB;
2526    }
2527}
2528
2529int bdrv_is_encrypted(BlockDriverState *bs)
2530{
2531    if (bs->backing && bs->backing->bs->encrypted) {
2532        return 1;
2533    }
2534    return bs->encrypted;
2535}
2536
2537int bdrv_key_required(BlockDriverState *bs)
2538{
2539    BdrvChild *backing = bs->backing;
2540
2541    if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2542        return 1;
2543    }
2544    return (bs->encrypted && !bs->valid_key);
2545}
2546
2547int bdrv_set_key(BlockDriverState *bs, const char *key)
2548{
2549    int ret;
2550    if (bs->backing && bs->backing->bs->encrypted) {
2551        ret = bdrv_set_key(bs->backing->bs, key);
2552        if (ret < 0)
2553            return ret;
2554        if (!bs->encrypted)
2555            return 0;
2556    }
2557    if (!bs->encrypted) {
2558        return -EINVAL;
2559    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2560        return -ENOMEDIUM;
2561    }
2562    ret = bs->drv->bdrv_set_key(bs, key);
2563    if (ret < 0) {
2564        bs->valid_key = 0;
2565    } else if (!bs->valid_key) {
2566        bs->valid_key = 1;
2567        if (bs->blk) {
2568            /* call the change callback now, we skipped it on open */
2569            blk_dev_change_media_cb(bs->blk, true);
2570        }
2571    }
2572    return ret;
2573}
2574
2575/*
2576 * Provide an encryption key for @bs.
2577 * If @key is non-null:
2578 *     If @bs is not encrypted, fail.
2579 *     Else if the key is invalid, fail.
2580 *     Else set @bs's key to @key, replacing the existing key, if any.
2581 * If @key is null:
2582 *     If @bs is encrypted and still lacks a key, fail.
2583 *     Else do nothing.
2584 * On failure, store an error object through @errp if non-null.
2585 */
2586void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2587{
2588    if (key) {
2589        if (!bdrv_is_encrypted(bs)) {
2590            error_setg(errp, "Node '%s' is not encrypted",
2591                      bdrv_get_device_or_node_name(bs));
2592        } else if (bdrv_set_key(bs, key) < 0) {
2593            error_setg(errp, QERR_INVALID_PASSWORD);
2594        }
2595    } else {
2596        if (bdrv_key_required(bs)) {
2597            error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2598                      "'%s' (%s) is encrypted",
2599                      bdrv_get_device_or_node_name(bs),
2600                      bdrv_get_encrypted_filename(bs));
2601        }
2602    }
2603}
2604
2605const char *bdrv_get_format_name(BlockDriverState *bs)
2606{
2607    return bs->drv ? bs->drv->format_name : NULL;
2608}
2609
2610static int qsort_strcmp(const void *a, const void *b)
2611{
2612    return strcmp(a, b);
2613}
2614
2615void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2616                         void *opaque)
2617{
2618    BlockDriver *drv;
2619    int count = 0;
2620    int i;
2621    const char **formats = NULL;
2622
2623    QLIST_FOREACH(drv, &bdrv_drivers, list) {
2624        if (drv->format_name) {
2625            bool found = false;
2626            int i = count;
2627            while (formats && i && !found) {
2628                found = !strcmp(formats[--i], drv->format_name);
2629            }
2630
2631            if (!found) {
2632                formats = g_renew(const char *, formats, count + 1);
2633                formats[count++] = drv->format_name;
2634            }
2635        }
2636    }
2637
2638    qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2639
2640    for (i = 0; i < count; i++) {
2641        it(opaque, formats[i]);
2642    }
2643
2644    g_free(formats);
2645}
2646
2647/* This function is to find a node in the bs graph */
2648BlockDriverState *bdrv_find_node(const char *node_name)
2649{
2650    BlockDriverState *bs;
2651
2652    assert(node_name);
2653
2654    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2655        if (!strcmp(node_name, bs->node_name)) {
2656            return bs;
2657        }
2658    }
2659    return NULL;
2660}
2661
2662/* Put this QMP function here so it can access the static graph_bdrv_states. */
2663BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2664{
2665    BlockDeviceInfoList *list, *entry;
2666    BlockDriverState *bs;
2667
2668    list = NULL;
2669    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2670        BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2671        if (!info) {
2672            qapi_free_BlockDeviceInfoList(list);
2673            return NULL;
2674        }
2675        entry = g_malloc0(sizeof(*entry));
2676        entry->value = info;
2677        entry->next = list;
2678        list = entry;
2679    }
2680
2681    return list;
2682}
2683
2684BlockDriverState *bdrv_lookup_bs(const char *device,
2685                                 const char *node_name,
2686                                 Error **errp)
2687{
2688    BlockBackend *blk;
2689    BlockDriverState *bs;
2690
2691    if (device) {
2692        blk = blk_by_name(device);
2693
2694        if (blk) {
2695            bs = blk_bs(blk);
2696            if (!bs) {
2697                error_setg(errp, "Device '%s' has no medium", device);
2698            }
2699
2700            return bs;
2701        }
2702    }
2703
2704    if (node_name) {
2705        bs = bdrv_find_node(node_name);
2706
2707        if (bs) {
2708            return bs;
2709        }
2710    }
2711
2712    error_setg(errp, "Cannot find device=%s nor node_name=%s",
2713                     device ? device : "",
2714                     node_name ? node_name : "");
2715    return NULL;
2716}
2717
2718/* If 'base' is in the same chain as 'top', return true. Otherwise,
2719 * return false.  If either argument is NULL, return false. */
2720bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2721{
2722    while (top && top != base) {
2723        top = backing_bs(top);
2724    }
2725
2726    return top != NULL;
2727}
2728
2729BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2730{
2731    if (!bs) {
2732        return QTAILQ_FIRST(&graph_bdrv_states);
2733    }
2734    return QTAILQ_NEXT(bs, node_list);
2735}
2736
2737BlockDriverState *bdrv_next(BlockDriverState *bs)
2738{
2739    if (!bs) {
2740        return QTAILQ_FIRST(&bdrv_states);
2741    }
2742    return QTAILQ_NEXT(bs, device_list);
2743}
2744
2745const char *bdrv_get_node_name(const BlockDriverState *bs)
2746{
2747    return bs->node_name;
2748}
2749
2750/* TODO check what callers really want: bs->node_name or blk_name() */
2751const char *bdrv_get_device_name(const BlockDriverState *bs)
2752{
2753    return bs->blk ? blk_name(bs->blk) : "";
2754}
2755
2756/* This can be used to identify nodes that might not have a device
2757 * name associated. Since node and device names live in the same
2758 * namespace, the result is unambiguous. The exception is if both are
2759 * absent, then this returns an empty (non-null) string. */
2760const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2761{
2762    return bs->blk ? blk_name(bs->blk) : bs->node_name;
2763}
2764
2765int bdrv_get_flags(BlockDriverState *bs)
2766{
2767    return bs->open_flags;
2768}
2769
2770int bdrv_has_zero_init_1(BlockDriverState *bs)
2771{
2772    return 1;
2773}
2774
2775int bdrv_has_zero_init(BlockDriverState *bs)
2776{
2777    assert(bs->drv);
2778
2779    /* If BS is a copy on write image, it is initialized to
2780       the contents of the base image, which may not be zeroes.  */
2781    if (bs->backing) {
2782        return 0;
2783    }
2784    if (bs->drv->bdrv_has_zero_init) {
2785        return bs->drv->bdrv_has_zero_init(bs);
2786    }
2787
2788    /* safe default */
2789    return 0;
2790}
2791
2792bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2793{
2794    BlockDriverInfo bdi;
2795
2796    if (bs->backing) {
2797        return false;
2798    }
2799
2800    if (bdrv_get_info(bs, &bdi) == 0) {
2801        return bdi.unallocated_blocks_are_zero;
2802    }
2803
2804    return false;
2805}
2806
2807bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2808{
2809    BlockDriverInfo bdi;
2810
2811    if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2812        return false;
2813    }
2814
2815    if (bdrv_get_info(bs, &bdi) == 0) {
2816        return bdi.can_write_zeroes_with_unmap;
2817    }
2818
2819    return false;
2820}
2821
2822const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2823{
2824    if (bs->backing && bs->backing->bs->encrypted)
2825        return bs->backing_file;
2826    else if (bs->encrypted)
2827        return bs->filename;
2828    else
2829        return NULL;
2830}
2831
2832void bdrv_get_backing_filename(BlockDriverState *bs,
2833                               char *filename, int filename_size)
2834{
2835    pstrcpy(filename, filename_size, bs->backing_file);
2836}
2837
2838int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2839{
2840    BlockDriver *drv = bs->drv;
2841    if (!drv)
2842        return -ENOMEDIUM;
2843    if (!drv->bdrv_get_info)
2844        return -ENOTSUP;
2845    memset(bdi, 0, sizeof(*bdi));
2846    return drv->bdrv_get_info(bs, bdi);
2847}
2848
2849ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2850{
2851    BlockDriver *drv = bs->drv;
2852    if (drv && drv->bdrv_get_specific_info) {
2853        return drv->bdrv_get_specific_info(bs);
2854    }
2855    return NULL;
2856}
2857
2858void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2859{
2860    if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2861        return;
2862    }
2863
2864    bs->drv->bdrv_debug_event(bs, event);
2865}
2866
2867int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
2868                          const char *tag)
2869{
2870    while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
2871        bs = bs->file ? bs->file->bs : NULL;
2872    }
2873
2874    if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
2875        return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
2876    }
2877
2878    return -ENOTSUP;
2879}
2880
2881int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
2882{
2883    while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
2884        bs = bs->file ? bs->file->bs : NULL;
2885    }
2886
2887    if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
2888        return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
2889    }
2890
2891    return -ENOTSUP;
2892}
2893
2894int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
2895{
2896    while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
2897        bs = bs->file ? bs->file->bs : NULL;
2898    }
2899
2900    if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
2901        return bs->drv->bdrv_debug_resume(bs, tag);
2902    }
2903
2904    return -ENOTSUP;
2905}
2906
2907bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
2908{
2909    while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
2910        bs = bs->file ? bs->file->bs : NULL;
2911    }
2912
2913    if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
2914        return bs->drv->bdrv_debug_is_suspended(bs, tag);
2915    }
2916
2917    return false;
2918}
2919
2920int bdrv_is_snapshot(BlockDriverState *bs)
2921{
2922    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2923}
2924
2925/* backing_file can either be relative, or absolute, or a protocol.  If it is
2926 * relative, it must be relative to the chain.  So, passing in bs->filename
2927 * from a BDS as backing_file should not be done, as that may be relative to
2928 * the CWD rather than the chain. */
2929BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2930        const char *backing_file)
2931{
2932    char *filename_full = NULL;
2933    char *backing_file_full = NULL;
2934    char *filename_tmp = NULL;
2935    int is_protocol = 0;
2936    BlockDriverState *curr_bs = NULL;
2937    BlockDriverState *retval = NULL;
2938
2939    if (!bs || !bs->drv || !backing_file) {
2940        return NULL;
2941    }
2942
2943    filename_full     = g_malloc(PATH_MAX);
2944    backing_file_full = g_malloc(PATH_MAX);
2945    filename_tmp      = g_malloc(PATH_MAX);
2946
2947    is_protocol = path_has_protocol(backing_file);
2948
2949    for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
2950
2951        /* If either of the filename paths is actually a protocol, then
2952         * compare unmodified paths; otherwise make paths relative */
2953        if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
2954            if (strcmp(backing_file, curr_bs->backing_file) == 0) {
2955                retval = curr_bs->backing->bs;
2956                break;
2957            }
2958        } else {
2959            /* If not an absolute filename path, make it relative to the current
2960             * image's filename path */
2961            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2962                         backing_file);
2963
2964            /* We are going to compare absolute pathnames */
2965            if (!realpath(filename_tmp, filename_full)) {
2966                continue;
2967            }
2968
2969            /* We need to make sure the backing filename we are comparing against
2970             * is relative to the current image filename (or absolute) */
2971            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
2972                         curr_bs->backing_file);
2973
2974            if (!realpath(filename_tmp, backing_file_full)) {
2975                continue;
2976            }
2977
2978            if (strcmp(backing_file_full, filename_full) == 0) {
2979                retval = curr_bs->backing->bs;
2980                break;
2981            }
2982        }
2983    }
2984
2985    g_free(filename_full);
2986    g_free(backing_file_full);
2987    g_free(filename_tmp);
2988    return retval;
2989}
2990
2991int bdrv_get_backing_file_depth(BlockDriverState *bs)
2992{
2993    if (!bs->drv) {
2994        return 0;
2995    }
2996
2997    if (!bs->backing) {
2998        return 0;
2999    }
3000
3001    return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3002}
3003
3004void bdrv_init(void)
3005{
3006    module_call_init(MODULE_INIT_BLOCK);
3007}
3008
3009void bdrv_init_with_whitelist(void)
3010{
3011    use_bdrv_whitelist = 1;
3012    bdrv_init();
3013}
3014
3015void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3016{
3017    Error *local_err = NULL;
3018    int ret;
3019
3020    if (!bs->drv)  {
3021        return;
3022    }
3023
3024    if (!(bs->open_flags & BDRV_O_INCOMING)) {
3025        return;
3026    }
3027    bs->open_flags &= ~BDRV_O_INCOMING;
3028
3029    if (bs->drv->bdrv_invalidate_cache) {
3030        bs->drv->bdrv_invalidate_cache(bs, &local_err);
3031    } else if (bs->file) {
3032        bdrv_invalidate_cache(bs->file->bs, &local_err);
3033    }
3034    if (local_err) {
3035        error_propagate(errp, local_err);
3036        return;
3037    }
3038
3039    ret = refresh_total_sectors(bs, bs->total_sectors);
3040    if (ret < 0) {
3041        error_setg_errno(errp, -ret, "Could not refresh total sector count");
3042        return;
3043    }
3044}
3045
3046void bdrv_invalidate_cache_all(Error **errp)
3047{
3048    BlockDriverState *bs;
3049    Error *local_err = NULL;
3050
3051    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3052        AioContext *aio_context = bdrv_get_aio_context(bs);
3053
3054        aio_context_acquire(aio_context);
3055        bdrv_invalidate_cache(bs, &local_err);
3056        aio_context_release(aio_context);
3057        if (local_err) {
3058            error_propagate(errp, local_err);
3059            return;
3060        }
3061    }
3062}
3063
3064/**************************************************************/
3065/* removable device support */
3066
3067/**
3068 * Return TRUE if the media is present
3069 */
3070bool bdrv_is_inserted(BlockDriverState *bs)
3071{
3072    BlockDriver *drv = bs->drv;
3073    BdrvChild *child;
3074
3075    if (!drv) {
3076        return false;
3077    }
3078    if (drv->bdrv_is_inserted) {
3079        return drv->bdrv_is_inserted(bs);
3080    }
3081    QLIST_FOREACH(child, &bs->children, next) {
3082        if (!bdrv_is_inserted(child->bs)) {
3083            return false;
3084        }
3085    }
3086    return true;
3087}
3088
3089/**
3090 * Return whether the media changed since the last call to this
3091 * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3092 */
3093int bdrv_media_changed(BlockDriverState *bs)
3094{
3095    BlockDriver *drv = bs->drv;
3096
3097    if (drv && drv->bdrv_media_changed) {
3098        return drv->bdrv_media_changed(bs);
3099    }
3100    return -ENOTSUP;
3101}
3102
3103/**
3104 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3105 */
3106void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3107{
3108    BlockDriver *drv = bs->drv;
3109    const char *device_name;
3110
3111    if (drv && drv->bdrv_eject) {
3112        drv->bdrv_eject(bs, eject_flag);
3113    }
3114
3115    device_name = bdrv_get_device_name(bs);
3116    if (device_name[0] != '\0') {
3117        qapi_event_send_device_tray_moved(device_name,
3118                                          eject_flag, &error_abort);
3119    }
3120}
3121
3122/**
3123 * Lock or unlock the media (if it is locked, the user won't be able
3124 * to eject it manually).
3125 */
3126void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3127{
3128    BlockDriver *drv = bs->drv;
3129
3130    trace_bdrv_lock_medium(bs, locked);
3131
3132    if (drv && drv->bdrv_lock_medium) {
3133        drv->bdrv_lock_medium(bs, locked);
3134    }
3135}
3136
3137BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3138{
3139    BdrvDirtyBitmap *bm;
3140
3141    assert(name);
3142    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3143        if (bm->name && !strcmp(name, bm->name)) {
3144            return bm;
3145        }
3146    }
3147    return NULL;
3148}
3149
3150void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3151{
3152    assert(!bdrv_dirty_bitmap_frozen(bitmap));
3153    g_free(bitmap->name);
3154    bitmap->name = NULL;
3155}
3156
3157BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3158                                          uint32_t granularity,
3159                                          const char *name,
3160                                          Error **errp)
3161{
3162    int64_t bitmap_size;
3163    BdrvDirtyBitmap *bitmap;
3164    uint32_t sector_granularity;
3165
3166    assert((granularity & (granularity - 1)) == 0);
3167
3168    if (name && bdrv_find_dirty_bitmap(bs, name)) {
3169        error_setg(errp, "Bitmap already exists: %s", name);
3170        return NULL;
3171    }
3172    sector_granularity = granularity >> BDRV_SECTOR_BITS;
3173    assert(sector_granularity);
3174    bitmap_size = bdrv_nb_sectors(bs);
3175    if (bitmap_size < 0) {
3176        error_setg_errno(errp, -bitmap_size, "could not get length of device");
3177        errno = -bitmap_size;
3178        return NULL;
3179    }
3180    bitmap = g_new0(BdrvDirtyBitmap, 1);
3181    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3182    bitmap->size = bitmap_size;
3183    bitmap->name = g_strdup(name);
3184    bitmap->disabled = false;
3185    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3186    return bitmap;
3187}
3188
3189bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3190{
3191    return bitmap->successor;
3192}
3193
3194bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3195{
3196    return !(bitmap->disabled || bitmap->successor);
3197}
3198
3199DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3200{
3201    if (bdrv_dirty_bitmap_frozen(bitmap)) {
3202        return DIRTY_BITMAP_STATUS_FROZEN;
3203    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3204        return DIRTY_BITMAP_STATUS_DISABLED;
3205    } else {
3206        return DIRTY_BITMAP_STATUS_ACTIVE;
3207    }
3208}
3209
3210/**
3211 * Create a successor bitmap destined to replace this bitmap after an operation.
3212 * Requires that the bitmap is not frozen and has no successor.
3213 */
3214int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3215                                       BdrvDirtyBitmap *bitmap, Error **errp)
3216{
3217    uint64_t granularity;
3218    BdrvDirtyBitmap *child;
3219
3220    if (bdrv_dirty_bitmap_frozen(bitmap)) {
3221        error_setg(errp, "Cannot create a successor for a bitmap that is "
3222                   "currently frozen");
3223        return -1;
3224    }
3225    assert(!bitmap->successor);
3226
3227    /* Create an anonymous successor */
3228    granularity = bdrv_dirty_bitmap_granularity(bitmap);
3229    child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3230    if (!child) {
3231        return -1;
3232    }
3233
3234    /* Successor will be on or off based on our current state. */
3235    child->disabled = bitmap->disabled;
3236
3237    /* Install the successor and freeze the parent */
3238    bitmap->successor = child;
3239    return 0;
3240}
3241
3242/**
3243 * For a bitmap with a successor, yield our name to the successor,
3244 * delete the old bitmap, and return a handle to the new bitmap.
3245 */
3246BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3247                                            BdrvDirtyBitmap *bitmap,
3248                                            Error **errp)
3249{
3250    char *name;
3251    BdrvDirtyBitmap *successor = bitmap->successor;
3252
3253    if (successor == NULL) {
3254        error_setg(errp, "Cannot relinquish control if "
3255                   "there's no successor present");
3256        return NULL;
3257    }
3258
3259    name = bitmap->name;
3260    bitmap->name = NULL;
3261    successor->name = name;
3262    bitmap->successor = NULL;
3263    bdrv_release_dirty_bitmap(bs, bitmap);
3264
3265    return successor;
3266}
3267
3268/**
3269 * In cases of failure where we can no longer safely delete the parent,
3270 * we may wish to re-join the parent and child/successor.
3271 * The merged parent will be un-frozen, but not explicitly re-enabled.
3272 */
3273BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3274                                           BdrvDirtyBitmap *parent,
3275                                           Error **errp)
3276{
3277    BdrvDirtyBitmap *successor = parent->successor;
3278
3279    if (!successor) {
3280        error_setg(errp, "Cannot reclaim a successor when none is present");
3281        return NULL;
3282    }
3283
3284    if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3285        error_setg(errp, "Merging of parent and successor bitmap failed");
3286        return NULL;
3287    }
3288    bdrv_release_dirty_bitmap(bs, successor);
3289    parent->successor = NULL;
3290
3291    return parent;
3292}
3293
3294/**
3295 * Truncates _all_ bitmaps attached to a BDS.
3296 */
3297static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3298{
3299    BdrvDirtyBitmap *bitmap;
3300    uint64_t size = bdrv_nb_sectors(bs);
3301
3302    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3303        assert(!bdrv_dirty_bitmap_frozen(bitmap));
3304        hbitmap_truncate(bitmap->bitmap, size);
3305        bitmap->size = size;
3306    }
3307}
3308
3309void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3310{
3311    BdrvDirtyBitmap *bm, *next;
3312    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3313        if (bm == bitmap) {
3314            assert(!bdrv_dirty_bitmap_frozen(bm));
3315            QLIST_REMOVE(bitmap, list);
3316            hbitmap_free(bitmap->bitmap);
3317            g_free(bitmap->name);
3318            g_free(bitmap);
3319            return;
3320        }
3321    }
3322}
3323
3324void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3325{
3326    assert(!bdrv_dirty_bitmap_frozen(bitmap));
3327    bitmap->disabled = true;
3328}
3329
3330void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3331{
3332    assert(!bdrv_dirty_bitmap_frozen(bitmap));
3333    bitmap->disabled = false;
3334}
3335
3336BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3337{
3338    BdrvDirtyBitmap *bm;
3339    BlockDirtyInfoList *list = NULL;
3340    BlockDirtyInfoList **plist = &list;
3341
3342    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3343        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3344        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3345        info->count = bdrv_get_dirty_count(bm);
3346        info->granularity = bdrv_dirty_bitmap_granularity(bm);
3347        info->has_name = !!bm->name;
3348        info->name = g_strdup(bm->name);
3349        info->status = bdrv_dirty_bitmap_status(bm);
3350        entry->value = info;
3351        *plist = entry;
3352        plist = &entry->next;
3353    }
3354
3355    return list;
3356}
3357
3358int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3359{
3360    if (bitmap) {
3361        return hbitmap_get(bitmap->bitmap, sector);
3362    } else {
3363        return 0;
3364    }
3365}
3366
3367/**
3368 * Chooses a default granularity based on the existing cluster size,
3369 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3370 * is no cluster size information available.
3371 */
3372uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3373{
3374    BlockDriverInfo bdi;
3375    uint32_t granularity;
3376
3377    if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3378        granularity = MAX(4096, bdi.cluster_size);
3379        granularity = MIN(65536, granularity);
3380    } else {
3381        granularity = 65536;
3382    }
3383
3384    return granularity;
3385}
3386
3387uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3388{
3389    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3390}
3391
3392void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3393{
3394    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3395}
3396
3397void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3398                           int64_t cur_sector, int nr_sectors)
3399{
3400    assert(bdrv_dirty_bitmap_enabled(bitmap));
3401    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3402}
3403
3404void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3405                             int64_t cur_sector, int nr_sectors)
3406{
3407    assert(bdrv_dirty_bitmap_enabled(bitmap));
3408    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3409}
3410
3411void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
3412{
3413    assert(bdrv_dirty_bitmap_enabled(bitmap));
3414    if (!out) {
3415        hbitmap_reset_all(bitmap->bitmap);
3416    } else {
3417        HBitmap *backup = bitmap->bitmap;
3418        bitmap->bitmap = hbitmap_alloc(bitmap->size,
3419                                       hbitmap_granularity(backup));
3420        *out = backup;
3421    }
3422}
3423
3424void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
3425{
3426    HBitmap *tmp = bitmap->bitmap;
3427    assert(bdrv_dirty_bitmap_enabled(bitmap));
3428    bitmap->bitmap = in;
3429    hbitmap_free(tmp);
3430}
3431
3432void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3433                    int nr_sectors)
3434{
3435    BdrvDirtyBitmap *bitmap;
3436    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3437        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3438            continue;
3439        }
3440        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3441    }
3442}
3443
3444/**
3445 * Advance an HBitmapIter to an arbitrary offset.
3446 */
3447void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3448{
3449    assert(hbi->hb);
3450    hbitmap_iter_init(hbi, hbi->hb, offset);
3451}
3452
3453int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3454{
3455    return hbitmap_count(bitmap->bitmap);
3456}
3457
3458/* Get a reference to bs */
3459void bdrv_ref(BlockDriverState *bs)
3460{
3461    bs->refcnt++;
3462}
3463
3464/* Release a previously grabbed reference to bs.
3465 * If after releasing, reference count is zero, the BlockDriverState is
3466 * deleted. */
3467void bdrv_unref(BlockDriverState *bs)
3468{
3469    if (!bs) {
3470        return;
3471    }
3472    assert(bs->refcnt > 0);
3473    if (--bs->refcnt == 0) {
3474        bdrv_delete(bs);
3475    }
3476}
3477
3478struct BdrvOpBlocker {
3479    Error *reason;
3480    QLIST_ENTRY(BdrvOpBlocker) list;
3481};
3482
3483bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3484{
3485    BdrvOpBlocker *blocker;
3486    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3487    if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3488        blocker = QLIST_FIRST(&bs->op_blockers[op]);
3489        if (errp) {
3490            error_setg(errp, "Node '%s' is busy: %s",
3491                       bdrv_get_device_or_node_name(bs),
3492                       error_get_pretty(blocker->reason));
3493        }
3494        return true;
3495    }
3496    return false;
3497}
3498
3499void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3500{
3501    BdrvOpBlocker *blocker;
3502    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3503
3504    blocker = g_new0(BdrvOpBlocker, 1);
3505    blocker->reason = reason;
3506    QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3507}
3508
3509void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3510{
3511    BdrvOpBlocker *blocker, *next;
3512    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3513    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3514        if (blocker->reason == reason) {
3515            QLIST_REMOVE(blocker, list);
3516            g_free(blocker);
3517        }
3518    }
3519}
3520
3521void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3522{
3523    int i;
3524    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3525        bdrv_op_block(bs, i, reason);
3526    }
3527}
3528
3529void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3530{
3531    int i;
3532    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3533        bdrv_op_unblock(bs, i, reason);
3534    }
3535}
3536
3537bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3538{
3539    int i;
3540
3541    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3542        if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3543            return false;
3544        }
3545    }
3546    return true;
3547}
3548
3549void bdrv_img_create(const char *filename, const char *fmt,
3550                     const char *base_filename, const char *base_fmt,
3551                     char *options, uint64_t img_size, int flags,
3552                     Error **errp, bool quiet)
3553{
3554    QemuOptsList *create_opts = NULL;
3555    QemuOpts *opts = NULL;
3556    const char *backing_fmt, *backing_file;
3557    int64_t size;
3558    BlockDriver *drv, *proto_drv;
3559    Error *local_err = NULL;
3560    int ret = 0;
3561
3562    /* Find driver and parse its options */
3563    drv = bdrv_find_format(fmt);
3564    if (!drv) {
3565        error_setg(errp, "Unknown file format '%s'", fmt);
3566        return;
3567    }
3568
3569    proto_drv = bdrv_find_protocol(filename, true, errp);
3570    if (!proto_drv) {
3571        return;
3572    }
3573
3574    if (!drv->create_opts) {
3575        error_setg(errp, "Format driver '%s' does not support image creation",
3576                   drv->format_name);
3577        return;
3578    }
3579
3580    if (!proto_drv->create_opts) {
3581        error_setg(errp, "Protocol driver '%s' does not support image creation",
3582                   proto_drv->format_name);
3583        return;
3584    }
3585
3586    create_opts = qemu_opts_append(create_opts, drv->create_opts);
3587    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3588
3589    /* Create parameter list with default values */
3590    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3591    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3592
3593    /* Parse -o options */
3594    if (options) {
3595        qemu_opts_do_parse(opts, options, NULL, &local_err);
3596        if (local_err) {
3597            error_report_err(local_err);
3598            local_err = NULL;
3599            error_setg(errp, "Invalid options for file format '%s'", fmt);
3600            goto out;
3601        }
3602    }
3603
3604    if (base_filename) {
3605        qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3606        if (local_err) {
3607            error_setg(errp, "Backing file not supported for file format '%s'",
3608                       fmt);
3609            goto out;
3610        }
3611    }
3612
3613    if (base_fmt) {
3614        qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3615        if (local_err) {
3616            error_setg(errp, "Backing file format not supported for file "
3617                             "format '%s'", fmt);
3618            goto out;
3619        }
3620    }
3621
3622    backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3623    if (backing_file) {
3624        if (!strcmp(filename, backing_file)) {
3625            error_setg(errp, "Error: Trying to create an image with the "
3626                             "same filename as the backing file");
3627            goto out;
3628        }
3629    }
3630
3631    backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3632
3633    // The size for the image must always be specified, with one exception:
3634    // If we are using a backing file, we can obtain the size from there
3635    size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3636    if (size == -1) {
3637        if (backing_file) {
3638            BlockDriverState *bs;
3639            char *full_backing = g_new0(char, PATH_MAX);
3640            int64_t size;
3641            int back_flags;
3642            QDict *backing_options = NULL;
3643
3644            bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3645                                                         full_backing, PATH_MAX,
3646                                                         &local_err);
3647            if (local_err) {
3648                g_free(full_backing);
3649                goto out;
3650            }
3651
3652            /* backing files always opened read-only */
3653            back_flags =
3654                flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3655
3656            if (backing_fmt) {
3657                backing_options = qdict_new();
3658                qdict_put(backing_options, "driver",
3659                          qstring_from_str(backing_fmt));
3660            }
3661
3662            bs = NULL;
3663            ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3664                            back_flags, &local_err);
3665            g_free(full_backing);
3666            if (ret < 0) {
3667                goto out;
3668            }
3669            size = bdrv_getlength(bs);
3670            if (size < 0) {
3671                error_setg_errno(errp, -size, "Could not get size of '%s'",
3672                                 backing_file);
3673                bdrv_unref(bs);
3674                goto out;
3675            }
3676
3677            qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3678
3679            bdrv_unref(bs);
3680        } else {
3681            error_setg(errp, "Image creation needs a size parameter");
3682            goto out;
3683        }
3684    }
3685
3686    if (!quiet) {
3687        printf("Formatting '%s', fmt=%s ", filename, fmt);
3688        qemu_opts_print(opts, " ");
3689        puts("");
3690    }
3691
3692    ret = bdrv_create(drv, filename, opts, &local_err);
3693
3694    if (ret == -EFBIG) {
3695        /* This is generally a better message than whatever the driver would
3696         * deliver (especially because of the cluster_size_hint), since that
3697         * is most probably not much different from "image too large". */
3698        const char *cluster_size_hint = "";
3699        if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3700            cluster_size_hint = " (try using a larger cluster size)";
3701        }
3702        error_setg(errp, "The image size is too large for file format '%s'"
3703                   "%s", fmt, cluster_size_hint);
3704        error_free(local_err);
3705        local_err = NULL;
3706    }
3707
3708out:
3709    qemu_opts_del(opts);
3710    qemu_opts_free(create_opts);
3711    if (local_err) {
3712        error_propagate(errp, local_err);
3713    }
3714}
3715
3716AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3717{
3718    return bs->aio_context;
3719}
3720
3721void bdrv_detach_aio_context(BlockDriverState *bs)
3722{
3723    BdrvAioNotifier *baf;
3724
3725    if (!bs->drv) {
3726        return;
3727    }
3728
3729    QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3730        baf->detach_aio_context(baf->opaque);
3731    }
3732
3733    if (bs->throttle_state) {
3734        throttle_timers_detach_aio_context(&bs->throttle_timers);
3735    }
3736    if (bs->drv->bdrv_detach_aio_context) {
3737        bs->drv->bdrv_detach_aio_context(bs);
3738    }
3739    if (bs->file) {
3740        bdrv_detach_aio_context(bs->file->bs);
3741    }
3742    if (bs->backing) {
3743        bdrv_detach_aio_context(bs->backing->bs);
3744    }
3745
3746    bs->aio_context = NULL;
3747}
3748
3749void bdrv_attach_aio_context(BlockDriverState *bs,
3750                             AioContext *new_context)
3751{
3752    BdrvAioNotifier *ban;
3753
3754    if (!bs->drv) {
3755        return;
3756    }
3757
3758    bs->aio_context = new_context;
3759
3760    if (bs->backing) {
3761        bdrv_attach_aio_context(bs->backing->bs, new_context);
3762    }
3763    if (bs->file) {
3764        bdrv_attach_aio_context(bs->file->bs, new_context);
3765    }
3766    if (bs->drv->bdrv_attach_aio_context) {
3767        bs->drv->bdrv_attach_aio_context(bs, new_context);
3768    }
3769    if (bs->throttle_state) {
3770        throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3771    }
3772
3773    QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3774        ban->attached_aio_context(new_context, ban->opaque);
3775    }
3776}
3777
3778void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3779{
3780    bdrv_drain(bs); /* ensure there are no in-flight requests */
3781
3782    bdrv_detach_aio_context(bs);
3783
3784    /* This function executes in the old AioContext so acquire the new one in
3785     * case it runs in a different thread.
3786     */
3787    aio_context_acquire(new_context);
3788    bdrv_attach_aio_context(bs, new_context);
3789    aio_context_release(new_context);
3790}
3791
3792void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3793        void (*attached_aio_context)(AioContext *new_context, void *opaque),
3794        void (*detach_aio_context)(void *opaque), void *opaque)
3795{
3796    BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3797    *ban = (BdrvAioNotifier){
3798        .attached_aio_context = attached_aio_context,
3799        .detach_aio_context   = detach_aio_context,
3800        .opaque               = opaque
3801    };
3802
3803    QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3804}
3805
3806void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3807                                      void (*attached_aio_context)(AioContext *,
3808                                                                   void *),
3809                                      void (*detach_aio_context)(void *),
3810                                      void *opaque)
3811{
3812    BdrvAioNotifier *ban, *ban_next;
3813
3814    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3815        if (ban->attached_aio_context == attached_aio_context &&
3816            ban->detach_aio_context   == detach_aio_context   &&
3817            ban->opaque               == opaque)
3818        {
3819            QLIST_REMOVE(ban, list);
3820            g_free(ban);
3821
3822            return;
3823        }
3824    }
3825
3826    abort();
3827}
3828
3829int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3830                       BlockDriverAmendStatusCB *status_cb)
3831{
3832    if (!bs->drv->bdrv_amend_options) {
3833        return -ENOTSUP;
3834    }
3835    return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3836}
3837
3838/* This function will be called by the bdrv_recurse_is_first_non_filter method
3839 * of block filter and by bdrv_is_first_non_filter.
3840 * It is used to test if the given bs is the candidate or recurse more in the
3841 * node graph.
3842 */
3843bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3844                                      BlockDriverState *candidate)
3845{
3846    /* return false if basic checks fails */
3847    if (!bs || !bs->drv) {
3848        return false;
3849    }
3850
3851    /* the code reached a non block filter driver -> check if the bs is
3852     * the same as the candidate. It's the recursion termination condition.
3853     */
3854    if (!bs->drv->is_filter) {
3855        return bs == candidate;
3856    }
3857    /* Down this path the driver is a block filter driver */
3858
3859    /* If the block filter recursion method is defined use it to recurse down
3860     * the node graph.
3861     */
3862    if (bs->drv->bdrv_recurse_is_first_non_filter) {
3863        return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3864    }
3865
3866    /* the driver is a block filter but don't allow to recurse -> return false
3867     */
3868    return false;
3869}
3870
3871/* This function checks if the candidate is the first non filter bs down it's
3872 * bs chain. Since we don't have pointers to parents it explore all bs chains
3873 * from the top. Some filters can choose not to pass down the recursion.
3874 */
3875bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3876{
3877    BlockDriverState *bs;
3878
3879    /* walk down the bs forest recursively */
3880    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3881        bool perm;
3882
3883        /* try to recurse in this top level bs */
3884        perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3885
3886        /* candidate is the first non filter */
3887        if (perm) {
3888            return true;
3889        }
3890    }
3891
3892    return false;
3893}
3894
3895BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3896                                        const char *node_name, Error **errp)
3897{
3898    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3899    AioContext *aio_context;
3900
3901    if (!to_replace_bs) {
3902        error_setg(errp, "Node name '%s' not found", node_name);
3903        return NULL;
3904    }
3905
3906    aio_context = bdrv_get_aio_context(to_replace_bs);
3907    aio_context_acquire(aio_context);
3908
3909    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3910        to_replace_bs = NULL;
3911        goto out;
3912    }
3913
3914    /* We don't want arbitrary node of the BDS chain to be replaced only the top
3915     * most non filter in order to prevent data corruption.
3916     * Another benefit is that this tests exclude backing files which are
3917     * blocked by the backing blockers.
3918     */
3919    if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3920        error_setg(errp, "Only top most non filter can be replaced");
3921        to_replace_bs = NULL;
3922        goto out;
3923    }
3924
3925out:
3926    aio_context_release(aio_context);
3927    return to_replace_bs;
3928}
3929
3930static bool append_open_options(QDict *d, BlockDriverState *bs)
3931{
3932    const QDictEntry *entry;
3933    bool found_any = false;
3934
3935    for (entry = qdict_first(bs->options); entry;
3936         entry = qdict_next(bs->options, entry))
3937    {
3938        /* Only take options for this level and exclude all non-driver-specific
3939         * options */
3940        if (!strchr(qdict_entry_key(entry), '.') &&
3941            strcmp(qdict_entry_key(entry), "node-name"))
3942        {
3943            qobject_incref(qdict_entry_value(entry));
3944            qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3945            found_any = true;
3946        }
3947    }
3948
3949    return found_any;
3950}
3951
3952/* Updates the following BDS fields:
3953 *  - exact_filename: A filename which may be used for opening a block device
3954 *                    which (mostly) equals the given BDS (even without any
3955 *                    other options; so reading and writing must return the same
3956 *                    results, but caching etc. may be different)
3957 *  - full_open_options: Options which, when given when opening a block device
3958 *                       (without a filename), result in a BDS (mostly)
3959 *                       equalling the given one
3960 *  - filename: If exact_filename is set, it is copied here. Otherwise,
3961 *              full_open_options is converted to a JSON object, prefixed with
3962 *              "json:" (for use through the JSON pseudo protocol) and put here.
3963 */
3964void bdrv_refresh_filename(BlockDriverState *bs)
3965{
3966    BlockDriver *drv = bs->drv;
3967    QDict *opts;
3968
3969    if (!drv) {
3970        return;
3971    }
3972
3973    /* This BDS's file name will most probably depend on its file's name, so
3974     * refresh that first */
3975    if (bs->file) {
3976        bdrv_refresh_filename(bs->file->bs);
3977    }
3978
3979    if (drv->bdrv_refresh_filename) {
3980        /* Obsolete information is of no use here, so drop the old file name
3981         * information before refreshing it */
3982        bs->exact_filename[0] = '\0';
3983        if (bs->full_open_options) {
3984            QDECREF(bs->full_open_options);
3985            bs->full_open_options = NULL;
3986        }
3987
3988        drv->bdrv_refresh_filename(bs);
3989    } else if (bs->file) {
3990        /* Try to reconstruct valid information from the underlying file */
3991        bool has_open_options;
3992
3993        bs->exact_filename[0] = '\0';
3994        if (bs->full_open_options) {
3995            QDECREF(bs->full_open_options);
3996            bs->full_open_options = NULL;
3997        }
3998
3999        opts = qdict_new();
4000        has_open_options = append_open_options(opts, bs);
4001
4002        /* If no specific options have been given for this BDS, the filename of
4003         * the underlying file should suffice for this one as well */
4004        if (bs->file->bs->exact_filename[0] && !has_open_options) {
4005            strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4006        }
4007        /* Reconstructing the full options QDict is simple for most format block
4008         * drivers, as long as the full options are known for the underlying
4009         * file BDS. The full options QDict of that file BDS should somehow
4010         * contain a representation of the filename, therefore the following
4011         * suffices without querying the (exact_)filename of this BDS. */
4012        if (bs->file->bs->full_open_options) {
4013            qdict_put_obj(opts, "driver",
4014                          QOBJECT(qstring_from_str(drv->format_name)));
4015            QINCREF(bs->file->bs->full_open_options);
4016            qdict_put_obj(opts, "file",
4017                          QOBJECT(bs->file->bs->full_open_options));
4018
4019            bs->full_open_options = opts;
4020        } else {
4021            QDECREF(opts);
4022        }
4023    } else if (!bs->full_open_options && qdict_size(bs->options)) {
4024        /* There is no underlying file BDS (at least referenced by BDS.file),
4025         * so the full options QDict should be equal to the options given
4026         * specifically for this block device when it was opened (plus the
4027         * driver specification).
4028         * Because those options don't change, there is no need to update
4029         * full_open_options when it's already set. */
4030
4031        opts = qdict_new();
4032        append_open_options(opts, bs);
4033        qdict_put_obj(opts, "driver",
4034                      QOBJECT(qstring_from_str(drv->format_name)));
4035
4036        if (bs->exact_filename[0]) {
4037            /* This may not work for all block protocol drivers (some may
4038             * require this filename to be parsed), but we have to find some
4039             * default solution here, so just include it. If some block driver
4040             * does not support pure options without any filename at all or
4041             * needs some special format of the options QDict, it needs to
4042             * implement the driver-specific bdrv_refresh_filename() function.
4043             */
4044            qdict_put_obj(opts, "filename",
4045                          QOBJECT(qstring_from_str(bs->exact_filename)));
4046        }
4047
4048        bs->full_open_options = opts;
4049    }
4050
4051    if (bs->exact_filename[0]) {
4052        pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4053    } else if (bs->full_open_options) {
4054        QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4055        snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4056                 qstring_get_str(json));
4057        QDECREF(json);
4058    }
4059}
4060