qemu/block.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator block driver
   3 *
   4 * Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24#include "qemu/osdep.h"
  25#include "trace.h"
  26#include "block/block_int.h"
  27#include "block/blockjob.h"
  28#include "qemu/error-report.h"
  29#include "qemu/module.h"
  30#include "qapi/qmp/qerror.h"
  31#include "qapi/qmp/qbool.h"
  32#include "qapi/qmp/qjson.h"
  33#include "sysemu/block-backend.h"
  34#include "sysemu/sysemu.h"
  35#include "qemu/notify.h"
  36#include "qemu/coroutine.h"
  37#include "block/qapi.h"
  38#include "qmp-commands.h"
  39#include "qemu/timer.h"
  40#include "qapi-event.h"
  41#include "block/throttle-groups.h"
  42#include "qemu/cutils.h"
  43#include "qemu/id.h"
  44
  45#ifdef CONFIG_BSD
  46#include <sys/ioctl.h>
  47#include <sys/queue.h>
  48#ifndef __DragonFly__
  49#include <sys/disk.h>
  50#endif
  51#endif
  52
  53#ifdef _WIN32
  54#include <windows.h>
  55#endif
  56
  57#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
  58
  59static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
  60    QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
  61
  62static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
  63    QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
  64
  65static QLIST_HEAD(, BlockDriver) bdrv_drivers =
  66    QLIST_HEAD_INITIALIZER(bdrv_drivers);
  67
  68static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
  69                             const char *reference, QDict *options, int flags,
  70                             BlockDriverState *parent,
  71                             const BdrvChildRole *child_role, Error **errp);
  72
  73/* If non-zero, use only whitelisted block drivers */
  74static int use_bdrv_whitelist;
  75
  76static void bdrv_close(BlockDriverState *bs);
  77
  78#ifdef _WIN32
  79static int is_windows_drive_prefix(const char *filename)
  80{
  81    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
  82             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
  83            filename[1] == ':');
  84}
  85
  86int is_windows_drive(const char *filename)
  87{
  88    if (is_windows_drive_prefix(filename) &&
  89        filename[2] == '\0')
  90        return 1;
  91    if (strstart(filename, "\\\\.\\", NULL) ||
  92        strstart(filename, "//./", NULL))
  93        return 1;
  94    return 0;
  95}
  96#endif
  97
  98size_t bdrv_opt_mem_align(BlockDriverState *bs)
  99{
 100    if (!bs || !bs->drv) {
 101        /* page size or 4k (hdd sector size) should be on the safe side */
 102        return MAX(4096, getpagesize());
 103    }
 104
 105    return bs->bl.opt_mem_alignment;
 106}
 107
 108size_t bdrv_min_mem_align(BlockDriverState *bs)
 109{
 110    if (!bs || !bs->drv) {
 111        /* page size or 4k (hdd sector size) should be on the safe side */
 112        return MAX(4096, getpagesize());
 113    }
 114
 115    return bs->bl.min_mem_alignment;
 116}
 117
 118/* check if the path starts with "<protocol>:" */
 119int path_has_protocol(const char *path)
 120{
 121    const char *p;
 122
 123#ifdef _WIN32
 124    if (is_windows_drive(path) ||
 125        is_windows_drive_prefix(path)) {
 126        return 0;
 127    }
 128    p = path + strcspn(path, ":/\\");
 129#else
 130    p = path + strcspn(path, ":/");
 131#endif
 132
 133    return *p == ':';
 134}
 135
 136int path_is_absolute(const char *path)
 137{
 138#ifdef _WIN32
 139    /* specific case for names like: "\\.\d:" */
 140    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
 141        return 1;
 142    }
 143    return (*path == '/' || *path == '\\');
 144#else
 145    return (*path == '/');
 146#endif
 147}
 148
 149/* if filename is absolute, just copy it to dest. Otherwise, build a
 150   path to it by considering it is relative to base_path. URL are
 151   supported. */
 152void path_combine(char *dest, int dest_size,
 153                  const char *base_path,
 154                  const char *filename)
 155{
 156    const char *p, *p1;
 157    int len;
 158
 159    if (dest_size <= 0)
 160        return;
 161    if (path_is_absolute(filename)) {
 162        pstrcpy(dest, dest_size, filename);
 163    } else {
 164        p = strchr(base_path, ':');
 165        if (p)
 166            p++;
 167        else
 168            p = base_path;
 169        p1 = strrchr(base_path, '/');
 170#ifdef _WIN32
 171        {
 172            const char *p2;
 173            p2 = strrchr(base_path, '\\');
 174            if (!p1 || p2 > p1)
 175                p1 = p2;
 176        }
 177#endif
 178        if (p1)
 179            p1++;
 180        else
 181            p1 = base_path;
 182        if (p1 > p)
 183            p = p1;
 184        len = p - base_path;
 185        if (len > dest_size - 1)
 186            len = dest_size - 1;
 187        memcpy(dest, base_path, len);
 188        dest[len] = '\0';
 189        pstrcat(dest, dest_size, filename);
 190    }
 191}
 192
 193void bdrv_get_full_backing_filename_from_filename(const char *backed,
 194                                                  const char *backing,
 195                                                  char *dest, size_t sz,
 196                                                  Error **errp)
 197{
 198    if (backing[0] == '\0' || path_has_protocol(backing) ||
 199        path_is_absolute(backing))
 200    {
 201        pstrcpy(dest, sz, backing);
 202    } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
 203        error_setg(errp, "Cannot use relative backing file names for '%s'",
 204                   backed);
 205    } else {
 206        path_combine(dest, sz, backed, backing);
 207    }
 208}
 209
 210void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
 211                                    Error **errp)
 212{
 213    char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
 214
 215    bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
 216                                                 dest, sz, errp);
 217}
 218
 219void bdrv_register(BlockDriver *bdrv)
 220{
 221    bdrv_setup_io_funcs(bdrv);
 222
 223    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 224}
 225
 226BlockDriverState *bdrv_new_root(void)
 227{
 228    return bdrv_new();
 229}
 230
 231BlockDriverState *bdrv_new(void)
 232{
 233    BlockDriverState *bs;
 234    int i;
 235
 236    bs = g_new0(BlockDriverState, 1);
 237    QLIST_INIT(&bs->dirty_bitmaps);
 238    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
 239        QLIST_INIT(&bs->op_blockers[i]);
 240    }
 241    notifier_with_return_list_init(&bs->before_write_notifiers);
 242    qemu_co_queue_init(&bs->throttled_reqs[0]);
 243    qemu_co_queue_init(&bs->throttled_reqs[1]);
 244    bs->refcnt = 1;
 245    bs->aio_context = qemu_get_aio_context();
 246
 247    QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
 248
 249    return bs;
 250}
 251
 252BlockDriver *bdrv_find_format(const char *format_name)
 253{
 254    BlockDriver *drv1;
 255    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
 256        if (!strcmp(drv1->format_name, format_name)) {
 257            return drv1;
 258        }
 259    }
 260    return NULL;
 261}
 262
 263static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
 264{
 265    static const char *whitelist_rw[] = {
 266        CONFIG_BDRV_RW_WHITELIST
 267    };
 268    static const char *whitelist_ro[] = {
 269        CONFIG_BDRV_RO_WHITELIST
 270    };
 271    const char **p;
 272
 273    if (!whitelist_rw[0] && !whitelist_ro[0]) {
 274        return 1;               /* no whitelist, anything goes */
 275    }
 276
 277    for (p = whitelist_rw; *p; p++) {
 278        if (!strcmp(drv->format_name, *p)) {
 279            return 1;
 280        }
 281    }
 282    if (read_only) {
 283        for (p = whitelist_ro; *p; p++) {
 284            if (!strcmp(drv->format_name, *p)) {
 285                return 1;
 286            }
 287        }
 288    }
 289    return 0;
 290}
 291
 292bool bdrv_uses_whitelist(void)
 293{
 294    return use_bdrv_whitelist;
 295}
 296
 297typedef struct CreateCo {
 298    BlockDriver *drv;
 299    char *filename;
 300    QemuOpts *opts;
 301    int ret;
 302    Error *err;
 303} CreateCo;
 304
 305static void coroutine_fn bdrv_create_co_entry(void *opaque)
 306{
 307    Error *local_err = NULL;
 308    int ret;
 309
 310    CreateCo *cco = opaque;
 311    assert(cco->drv);
 312
 313    ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
 314    if (local_err) {
 315        error_propagate(&cco->err, local_err);
 316    }
 317    cco->ret = ret;
 318}
 319
 320int bdrv_create(BlockDriver *drv, const char* filename,
 321                QemuOpts *opts, Error **errp)
 322{
 323    int ret;
 324
 325    Coroutine *co;
 326    CreateCo cco = {
 327        .drv = drv,
 328        .filename = g_strdup(filename),
 329        .opts = opts,
 330        .ret = NOT_DONE,
 331        .err = NULL,
 332    };
 333
 334    if (!drv->bdrv_create) {
 335        error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
 336        ret = -ENOTSUP;
 337        goto out;
 338    }
 339
 340    if (qemu_in_coroutine()) {
 341        /* Fast-path if already in coroutine context */
 342        bdrv_create_co_entry(&cco);
 343    } else {
 344        co = qemu_coroutine_create(bdrv_create_co_entry);
 345        qemu_coroutine_enter(co, &cco);
 346        while (cco.ret == NOT_DONE) {
 347            aio_poll(qemu_get_aio_context(), true);
 348        }
 349    }
 350
 351    ret = cco.ret;
 352    if (ret < 0) {
 353        if (cco.err) {
 354            error_propagate(errp, cco.err);
 355        } else {
 356            error_setg_errno(errp, -ret, "Could not create image");
 357        }
 358    }
 359
 360out:
 361    g_free(cco.filename);
 362    return ret;
 363}
 364
 365int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
 366{
 367    BlockDriver *drv;
 368    Error *local_err = NULL;
 369    int ret;
 370
 371    drv = bdrv_find_protocol(filename, true, errp);
 372    if (drv == NULL) {
 373        return -ENOENT;
 374    }
 375
 376    ret = bdrv_create(drv, filename, opts, &local_err);
 377    if (local_err) {
 378        error_propagate(errp, local_err);
 379    }
 380    return ret;
 381}
 382
 383/**
 384 * Try to get @bs's logical and physical block size.
 385 * On success, store them in @bsz struct and return 0.
 386 * On failure return -errno.
 387 * @bs must not be empty.
 388 */
 389int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 390{
 391    BlockDriver *drv = bs->drv;
 392
 393    if (drv && drv->bdrv_probe_blocksizes) {
 394        return drv->bdrv_probe_blocksizes(bs, bsz);
 395    }
 396
 397    return -ENOTSUP;
 398}
 399
 400/**
 401 * Try to get @bs's geometry (cyls, heads, sectors).
 402 * On success, store them in @geo struct and return 0.
 403 * On failure return -errno.
 404 * @bs must not be empty.
 405 */
 406int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 407{
 408    BlockDriver *drv = bs->drv;
 409
 410    if (drv && drv->bdrv_probe_geometry) {
 411        return drv->bdrv_probe_geometry(bs, geo);
 412    }
 413
 414    return -ENOTSUP;
 415}
 416
 417/*
 418 * Create a uniquely-named empty temporary file.
 419 * Return 0 upon success, otherwise a negative errno value.
 420 */
 421int get_tmp_filename(char *filename, int size)
 422{
 423#ifdef _WIN32
 424    char temp_dir[MAX_PATH];
 425    /* GetTempFileName requires that its output buffer (4th param)
 426       have length MAX_PATH or greater.  */
 427    assert(size >= MAX_PATH);
 428    return (GetTempPath(MAX_PATH, temp_dir)
 429            && GetTempFileName(temp_dir, "qem", 0, filename)
 430            ? 0 : -GetLastError());
 431#else
 432    int fd;
 433    const char *tmpdir;
 434    tmpdir = getenv("TMPDIR");
 435    if (!tmpdir) {
 436        tmpdir = "/var/tmp";
 437    }
 438    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
 439        return -EOVERFLOW;
 440    }
 441    fd = mkstemp(filename);
 442    if (fd < 0) {
 443        return -errno;
 444    }
 445    if (close(fd) != 0) {
 446        unlink(filename);
 447        return -errno;
 448    }
 449    return 0;
 450#endif
 451}
 452
 453/*
 454 * Detect host devices. By convention, /dev/cdrom[N] is always
 455 * recognized as a host CDROM.
 456 */
 457static BlockDriver *find_hdev_driver(const char *filename)
 458{
 459    int score_max = 0, score;
 460    BlockDriver *drv = NULL, *d;
 461
 462    QLIST_FOREACH(d, &bdrv_drivers, list) {
 463        if (d->bdrv_probe_device) {
 464            score = d->bdrv_probe_device(filename);
 465            if (score > score_max) {
 466                score_max = score;
 467                drv = d;
 468            }
 469        }
 470    }
 471
 472    return drv;
 473}
 474
 475BlockDriver *bdrv_find_protocol(const char *filename,
 476                                bool allow_protocol_prefix,
 477                                Error **errp)
 478{
 479    BlockDriver *drv1;
 480    char protocol[128];
 481    int len;
 482    const char *p;
 483
 484    /* TODO Drivers without bdrv_file_open must be specified explicitly */
 485
 486    /*
 487     * XXX(hch): we really should not let host device detection
 488     * override an explicit protocol specification, but moving this
 489     * later breaks access to device names with colons in them.
 490     * Thanks to the brain-dead persistent naming schemes on udev-
 491     * based Linux systems those actually are quite common.
 492     */
 493    drv1 = find_hdev_driver(filename);
 494    if (drv1) {
 495        return drv1;
 496    }
 497
 498    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
 499        return &bdrv_file;
 500    }
 501
 502    p = strchr(filename, ':');
 503    assert(p != NULL);
 504    len = p - filename;
 505    if (len > sizeof(protocol) - 1)
 506        len = sizeof(protocol) - 1;
 507    memcpy(protocol, filename, len);
 508    protocol[len] = '\0';
 509    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
 510        if (drv1->protocol_name &&
 511            !strcmp(drv1->protocol_name, protocol)) {
 512            return drv1;
 513        }
 514    }
 515
 516    error_setg(errp, "Unknown protocol '%s'", protocol);
 517    return NULL;
 518}
 519
 520/*
 521 * Guess image format by probing its contents.
 522 * This is not a good idea when your image is raw (CVE-2008-2004), but
 523 * we do it anyway for backward compatibility.
 524 *
 525 * @buf         contains the image's first @buf_size bytes.
 526 * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
 527 *              but can be smaller if the image file is smaller)
 528 * @filename    is its filename.
 529 *
 530 * For all block drivers, call the bdrv_probe() method to get its
 531 * probing score.
 532 * Return the first block driver with the highest probing score.
 533 */
 534BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
 535                            const char *filename)
 536{
 537    int score_max = 0, score;
 538    BlockDriver *drv = NULL, *d;
 539
 540    QLIST_FOREACH(d, &bdrv_drivers, list) {
 541        if (d->bdrv_probe) {
 542            score = d->bdrv_probe(buf, buf_size, filename);
 543            if (score > score_max) {
 544                score_max = score;
 545                drv = d;
 546            }
 547        }
 548    }
 549
 550    return drv;
 551}
 552
 553static int find_image_format(BlockDriverState *bs, const char *filename,
 554                             BlockDriver **pdrv, Error **errp)
 555{
 556    BlockDriver *drv;
 557    uint8_t buf[BLOCK_PROBE_BUF_SIZE];
 558    int ret = 0;
 559
 560    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
 561    if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
 562        *pdrv = &bdrv_raw;
 563        return ret;
 564    }
 565
 566    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
 567    if (ret < 0) {
 568        error_setg_errno(errp, -ret, "Could not read image for determining its "
 569                         "format");
 570        *pdrv = NULL;
 571        return ret;
 572    }
 573
 574    drv = bdrv_probe_all(buf, ret, filename);
 575    if (!drv) {
 576        error_setg(errp, "Could not determine image format: No compatible "
 577                   "driver found");
 578        ret = -ENOENT;
 579    }
 580    *pdrv = drv;
 581    return ret;
 582}
 583
 584/**
 585 * Set the current 'total_sectors' value
 586 * Return 0 on success, -errno on error.
 587 */
 588static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
 589{
 590    BlockDriver *drv = bs->drv;
 591
 592    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
 593    if (bdrv_is_sg(bs))
 594        return 0;
 595
 596    /* query actual device if possible, otherwise just trust the hint */
 597    if (drv->bdrv_getlength) {
 598        int64_t length = drv->bdrv_getlength(bs);
 599        if (length < 0) {
 600            return length;
 601        }
 602        hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
 603    }
 604
 605    bs->total_sectors = hint;
 606    return 0;
 607}
 608
 609/**
 610 * Combines a QDict of new block driver @options with any missing options taken
 611 * from @old_options, so that leaving out an option defaults to its old value.
 612 */
 613static void bdrv_join_options(BlockDriverState *bs, QDict *options,
 614                              QDict *old_options)
 615{
 616    if (bs->drv && bs->drv->bdrv_join_options) {
 617        bs->drv->bdrv_join_options(options, old_options);
 618    } else {
 619        qdict_join(options, old_options, false);
 620    }
 621}
 622
 623/**
 624 * Set open flags for a given discard mode
 625 *
 626 * Return 0 on success, -1 if the discard mode was invalid.
 627 */
 628int bdrv_parse_discard_flags(const char *mode, int *flags)
 629{
 630    *flags &= ~BDRV_O_UNMAP;
 631
 632    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
 633        /* do nothing */
 634    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
 635        *flags |= BDRV_O_UNMAP;
 636    } else {
 637        return -1;
 638    }
 639
 640    return 0;
 641}
 642
 643/**
 644 * Set open flags for a given cache mode
 645 *
 646 * Return 0 on success, -1 if the cache mode was invalid.
 647 */
 648int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
 649{
 650    *flags &= ~BDRV_O_CACHE_MASK;
 651
 652    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
 653        *writethrough = false;
 654        *flags |= BDRV_O_NOCACHE;
 655    } else if (!strcmp(mode, "directsync")) {
 656        *writethrough = true;
 657        *flags |= BDRV_O_NOCACHE;
 658    } else if (!strcmp(mode, "writeback")) {
 659        *writethrough = false;
 660    } else if (!strcmp(mode, "unsafe")) {
 661        *writethrough = false;
 662        *flags |= BDRV_O_NO_FLUSH;
 663    } else if (!strcmp(mode, "writethrough")) {
 664        *writethrough = true;
 665    } else {
 666        return -1;
 667    }
 668
 669    return 0;
 670}
 671
 672/*
 673 * Returns the options and flags that a temporary snapshot should get, based on
 674 * the originally requested flags (the originally requested image will have
 675 * flags like a backing file)
 676 */
 677static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
 678                                       int parent_flags, QDict *parent_options)
 679{
 680    *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
 681
 682    /* For temporary files, unconditional cache=unsafe is fine */
 683    qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
 684    qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
 685}
 686
 687/*
 688 * Returns the options and flags that bs->file should get if a protocol driver
 689 * is expected, based on the given options and flags for the parent BDS
 690 */
 691static void bdrv_inherited_options(int *child_flags, QDict *child_options,
 692                                   int parent_flags, QDict *parent_options)
 693{
 694    int flags = parent_flags;
 695
 696    /* Enable protocol handling, disable format probing for bs->file */
 697    flags |= BDRV_O_PROTOCOL;
 698
 699    /* If the cache mode isn't explicitly set, inherit direct and no-flush from
 700     * the parent. */
 701    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
 702    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
 703
 704    /* Our block drivers take care to send flushes and respect unmap policy,
 705     * so we can default to enable both on lower layers regardless of the
 706     * corresponding parent options. */
 707    flags |= BDRV_O_UNMAP;
 708
 709    /* Clear flags that only apply to the top layer */
 710    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
 711               BDRV_O_NO_IO);
 712
 713    *child_flags = flags;
 714}
 715
 716const BdrvChildRole child_file = {
 717    .inherit_options = bdrv_inherited_options,
 718};
 719
 720/*
 721 * Returns the options and flags that bs->file should get if the use of formats
 722 * (and not only protocols) is permitted for it, based on the given options and
 723 * flags for the parent BDS
 724 */
 725static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
 726                                       int parent_flags, QDict *parent_options)
 727{
 728    child_file.inherit_options(child_flags, child_options,
 729                               parent_flags, parent_options);
 730
 731    *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
 732}
 733
 734const BdrvChildRole child_format = {
 735    .inherit_options = bdrv_inherited_fmt_options,
 736};
 737
 738/*
 739 * Returns the options and flags that bs->backing should get, based on the
 740 * given options and flags for the parent BDS
 741 */
 742static void bdrv_backing_options(int *child_flags, QDict *child_options,
 743                                 int parent_flags, QDict *parent_options)
 744{
 745    int flags = parent_flags;
 746
 747    /* The cache mode is inherited unmodified for backing files; except WCE,
 748     * which is only applied on the top level (BlockBackend) */
 749    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
 750    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
 751
 752    /* backing files always opened read-only */
 753    flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
 754
 755    /* snapshot=on is handled on the top layer */
 756    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
 757
 758    *child_flags = flags;
 759}
 760
 761static const BdrvChildRole child_backing = {
 762    .inherit_options = bdrv_backing_options,
 763};
 764
 765static int bdrv_open_flags(BlockDriverState *bs, int flags)
 766{
 767    int open_flags = flags;
 768
 769    /*
 770     * Clear flags that are internal to the block layer before opening the
 771     * image.
 772     */
 773    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
 774
 775    /*
 776     * Snapshots should be writable.
 777     */
 778    if (flags & BDRV_O_TEMPORARY) {
 779        open_flags |= BDRV_O_RDWR;
 780    }
 781
 782    return open_flags;
 783}
 784
 785static void update_flags_from_options(int *flags, QemuOpts *opts)
 786{
 787    *flags &= ~BDRV_O_CACHE_MASK;
 788
 789    assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
 790    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
 791        *flags |= BDRV_O_NO_FLUSH;
 792    }
 793
 794    assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
 795    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
 796        *flags |= BDRV_O_NOCACHE;
 797    }
 798}
 799
 800static void update_options_from_flags(QDict *options, int flags)
 801{
 802    if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
 803        qdict_put(options, BDRV_OPT_CACHE_DIRECT,
 804                  qbool_from_bool(flags & BDRV_O_NOCACHE));
 805    }
 806    if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
 807        qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
 808                  qbool_from_bool(flags & BDRV_O_NO_FLUSH));
 809    }
 810}
 811
 812static void bdrv_assign_node_name(BlockDriverState *bs,
 813                                  const char *node_name,
 814                                  Error **errp)
 815{
 816    char *gen_node_name = NULL;
 817
 818    if (!node_name) {
 819        node_name = gen_node_name = id_generate(ID_BLOCK);
 820    } else if (!id_wellformed(node_name)) {
 821        /*
 822         * Check for empty string or invalid characters, but not if it is
 823         * generated (generated names use characters not available to the user)
 824         */
 825        error_setg(errp, "Invalid node name");
 826        return;
 827    }
 828
 829    /* takes care of avoiding namespaces collisions */
 830    if (blk_by_name(node_name)) {
 831        error_setg(errp, "node-name=%s is conflicting with a device id",
 832                   node_name);
 833        goto out;
 834    }
 835
 836    /* takes care of avoiding duplicates node names */
 837    if (bdrv_find_node(node_name)) {
 838        error_setg(errp, "Duplicate node name");
 839        goto out;
 840    }
 841
 842    /* copy node name into the bs and insert it into the graph list */
 843    pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
 844    QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
 845out:
 846    g_free(gen_node_name);
 847}
 848
 849static QemuOptsList bdrv_runtime_opts = {
 850    .name = "bdrv_common",
 851    .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
 852    .desc = {
 853        {
 854            .name = "node-name",
 855            .type = QEMU_OPT_STRING,
 856            .help = "Node name of the block device node",
 857        },
 858        {
 859            .name = "driver",
 860            .type = QEMU_OPT_STRING,
 861            .help = "Block driver to use for the node",
 862        },
 863        {
 864            .name = BDRV_OPT_CACHE_DIRECT,
 865            .type = QEMU_OPT_BOOL,
 866            .help = "Bypass software writeback cache on the host",
 867        },
 868        {
 869            .name = BDRV_OPT_CACHE_NO_FLUSH,
 870            .type = QEMU_OPT_BOOL,
 871            .help = "Ignore flush requests",
 872        },
 873        { /* end of list */ }
 874    },
 875};
 876
 877/*
 878 * Common part for opening disk images and files
 879 *
 880 * Removes all processed options from *options.
 881 */
 882static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
 883                            QDict *options, Error **errp)
 884{
 885    int ret, open_flags;
 886    const char *filename;
 887    const char *driver_name = NULL;
 888    const char *node_name = NULL;
 889    QemuOpts *opts;
 890    BlockDriver *drv;
 891    Error *local_err = NULL;
 892
 893    assert(bs->file == NULL);
 894    assert(options != NULL && bs->options != options);
 895
 896    opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
 897    qemu_opts_absorb_qdict(opts, options, &local_err);
 898    if (local_err) {
 899        error_propagate(errp, local_err);
 900        ret = -EINVAL;
 901        goto fail_opts;
 902    }
 903
 904    driver_name = qemu_opt_get(opts, "driver");
 905    drv = bdrv_find_format(driver_name);
 906    assert(drv != NULL);
 907
 908    if (file != NULL) {
 909        filename = file->bs->filename;
 910    } else {
 911        filename = qdict_get_try_str(options, "filename");
 912    }
 913
 914    if (drv->bdrv_needs_filename && !filename) {
 915        error_setg(errp, "The '%s' block driver requires a file name",
 916                   drv->format_name);
 917        ret = -EINVAL;
 918        goto fail_opts;
 919    }
 920
 921    trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
 922                           drv->format_name);
 923
 924    node_name = qemu_opt_get(opts, "node-name");
 925    bdrv_assign_node_name(bs, node_name, &local_err);
 926    if (local_err) {
 927        error_propagate(errp, local_err);
 928        ret = -EINVAL;
 929        goto fail_opts;
 930    }
 931
 932    bs->request_alignment = 512;
 933    bs->zero_beyond_eof = true;
 934    bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
 935
 936    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
 937        error_setg(errp,
 938                   !bs->read_only && bdrv_is_whitelisted(drv, true)
 939                        ? "Driver '%s' can only be used for read-only devices"
 940                        : "Driver '%s' is not whitelisted",
 941                   drv->format_name);
 942        ret = -ENOTSUP;
 943        goto fail_opts;
 944    }
 945
 946    assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
 947    if (bs->open_flags & BDRV_O_COPY_ON_READ) {
 948        if (!bs->read_only) {
 949            bdrv_enable_copy_on_read(bs);
 950        } else {
 951            error_setg(errp, "Can't use copy-on-read on read-only device");
 952            ret = -EINVAL;
 953            goto fail_opts;
 954        }
 955    }
 956
 957    if (filename != NULL) {
 958        pstrcpy(bs->filename, sizeof(bs->filename), filename);
 959    } else {
 960        bs->filename[0] = '\0';
 961    }
 962    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
 963
 964    bs->drv = drv;
 965    bs->opaque = g_malloc0(drv->instance_size);
 966
 967    /* Apply cache mode options */
 968    update_flags_from_options(&bs->open_flags, opts);
 969
 970    /* Open the image, either directly or using a protocol */
 971    open_flags = bdrv_open_flags(bs, bs->open_flags);
 972    if (drv->bdrv_file_open) {
 973        assert(file == NULL);
 974        assert(!drv->bdrv_needs_filename || filename != NULL);
 975        ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
 976    } else {
 977        if (file == NULL) {
 978            error_setg(errp, "Can't use '%s' as a block driver for the "
 979                       "protocol level", drv->format_name);
 980            ret = -EINVAL;
 981            goto free_and_fail;
 982        }
 983        bs->file = file;
 984        ret = drv->bdrv_open(bs, options, open_flags, &local_err);
 985    }
 986
 987    if (ret < 0) {
 988        if (local_err) {
 989            error_propagate(errp, local_err);
 990        } else if (bs->filename[0]) {
 991            error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
 992        } else {
 993            error_setg_errno(errp, -ret, "Could not open image");
 994        }
 995        goto free_and_fail;
 996    }
 997
 998    ret = refresh_total_sectors(bs, bs->total_sectors);
 999    if (ret < 0) {
1000        error_setg_errno(errp, -ret, "Could not refresh total sector count");
1001        goto free_and_fail;
1002    }
1003
1004    bdrv_refresh_limits(bs, &local_err);
1005    if (local_err) {
1006        error_propagate(errp, local_err);
1007        ret = -EINVAL;
1008        goto free_and_fail;
1009    }
1010
1011    assert(bdrv_opt_mem_align(bs) != 0);
1012    assert(bdrv_min_mem_align(bs) != 0);
1013    assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1014
1015    qemu_opts_del(opts);
1016    return 0;
1017
1018free_and_fail:
1019    bs->file = NULL;
1020    g_free(bs->opaque);
1021    bs->opaque = NULL;
1022    bs->drv = NULL;
1023fail_opts:
1024    qemu_opts_del(opts);
1025    return ret;
1026}
1027
1028static QDict *parse_json_filename(const char *filename, Error **errp)
1029{
1030    QObject *options_obj;
1031    QDict *options;
1032    int ret;
1033
1034    ret = strstart(filename, "json:", &filename);
1035    assert(ret);
1036
1037    options_obj = qobject_from_json(filename);
1038    if (!options_obj) {
1039        error_setg(errp, "Could not parse the JSON options");
1040        return NULL;
1041    }
1042
1043    if (qobject_type(options_obj) != QTYPE_QDICT) {
1044        qobject_decref(options_obj);
1045        error_setg(errp, "Invalid JSON object given");
1046        return NULL;
1047    }
1048
1049    options = qobject_to_qdict(options_obj);
1050    qdict_flatten(options);
1051
1052    return options;
1053}
1054
1055static void parse_json_protocol(QDict *options, const char **pfilename,
1056                                Error **errp)
1057{
1058    QDict *json_options;
1059    Error *local_err = NULL;
1060
1061    /* Parse json: pseudo-protocol */
1062    if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1063        return;
1064    }
1065
1066    json_options = parse_json_filename(*pfilename, &local_err);
1067    if (local_err) {
1068        error_propagate(errp, local_err);
1069        return;
1070    }
1071
1072    /* Options given in the filename have lower priority than options
1073     * specified directly */
1074    qdict_join(options, json_options, false);
1075    QDECREF(json_options);
1076    *pfilename = NULL;
1077}
1078
1079/*
1080 * Fills in default options for opening images and converts the legacy
1081 * filename/flags pair to option QDict entries.
1082 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1083 * block driver has been specified explicitly.
1084 */
1085static int bdrv_fill_options(QDict **options, const char *filename,
1086                             int *flags, Error **errp)
1087{
1088    const char *drvname;
1089    bool protocol = *flags & BDRV_O_PROTOCOL;
1090    bool parse_filename = false;
1091    BlockDriver *drv = NULL;
1092    Error *local_err = NULL;
1093
1094    drvname = qdict_get_try_str(*options, "driver");
1095    if (drvname) {
1096        drv = bdrv_find_format(drvname);
1097        if (!drv) {
1098            error_setg(errp, "Unknown driver '%s'", drvname);
1099            return -ENOENT;
1100        }
1101        /* If the user has explicitly specified the driver, this choice should
1102         * override the BDRV_O_PROTOCOL flag */
1103        protocol = drv->bdrv_file_open;
1104    }
1105
1106    if (protocol) {
1107        *flags |= BDRV_O_PROTOCOL;
1108    } else {
1109        *flags &= ~BDRV_O_PROTOCOL;
1110    }
1111
1112    /* Translate cache options from flags into options */
1113    update_options_from_flags(*options, *flags);
1114
1115    /* Fetch the file name from the options QDict if necessary */
1116    if (protocol && filename) {
1117        if (!qdict_haskey(*options, "filename")) {
1118            qdict_put(*options, "filename", qstring_from_str(filename));
1119            parse_filename = true;
1120        } else {
1121            error_setg(errp, "Can't specify 'file' and 'filename' options at "
1122                             "the same time");
1123            return -EINVAL;
1124        }
1125    }
1126
1127    /* Find the right block driver */
1128    filename = qdict_get_try_str(*options, "filename");
1129
1130    if (!drvname && protocol) {
1131        if (filename) {
1132            drv = bdrv_find_protocol(filename, parse_filename, errp);
1133            if (!drv) {
1134                return -EINVAL;
1135            }
1136
1137            drvname = drv->format_name;
1138            qdict_put(*options, "driver", qstring_from_str(drvname));
1139        } else {
1140            error_setg(errp, "Must specify either driver or file");
1141            return -EINVAL;
1142        }
1143    }
1144
1145    assert(drv || !protocol);
1146
1147    /* Driver-specific filename parsing */
1148    if (drv && drv->bdrv_parse_filename && parse_filename) {
1149        drv->bdrv_parse_filename(filename, *options, &local_err);
1150        if (local_err) {
1151            error_propagate(errp, local_err);
1152            return -EINVAL;
1153        }
1154
1155        if (!drv->bdrv_needs_filename) {
1156            qdict_del(*options, "filename");
1157        }
1158    }
1159
1160    return 0;
1161}
1162
1163BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1164                                  const char *child_name,
1165                                  const BdrvChildRole *child_role)
1166{
1167    BdrvChild *child = g_new(BdrvChild, 1);
1168    *child = (BdrvChild) {
1169        .bs     = child_bs,
1170        .name   = g_strdup(child_name),
1171        .role   = child_role,
1172    };
1173
1174    QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1175
1176    return child;
1177}
1178
1179static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1180                                    BlockDriverState *child_bs,
1181                                    const char *child_name,
1182                                    const BdrvChildRole *child_role)
1183{
1184    BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
1185    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1186    return child;
1187}
1188
1189static void bdrv_detach_child(BdrvChild *child)
1190{
1191    if (child->next.le_prev) {
1192        QLIST_REMOVE(child, next);
1193        child->next.le_prev = NULL;
1194    }
1195    QLIST_REMOVE(child, next_parent);
1196    g_free(child->name);
1197    g_free(child);
1198}
1199
1200void bdrv_root_unref_child(BdrvChild *child)
1201{
1202    BlockDriverState *child_bs;
1203
1204    child_bs = child->bs;
1205    bdrv_detach_child(child);
1206    bdrv_unref(child_bs);
1207}
1208
1209void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1210{
1211    if (child == NULL) {
1212        return;
1213    }
1214
1215    if (child->bs->inherits_from == parent) {
1216        child->bs->inherits_from = NULL;
1217    }
1218
1219    bdrv_root_unref_child(child);
1220}
1221
1222/*
1223 * Sets the backing file link of a BDS. A new reference is created; callers
1224 * which don't need their own reference any more must call bdrv_unref().
1225 */
1226void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1227{
1228    if (backing_hd) {
1229        bdrv_ref(backing_hd);
1230    }
1231
1232    if (bs->backing) {
1233        assert(bs->backing_blocker);
1234        bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1235        bdrv_unref_child(bs, bs->backing);
1236    } else if (backing_hd) {
1237        error_setg(&bs->backing_blocker,
1238                   "node is used as backing hd of '%s'",
1239                   bdrv_get_device_or_node_name(bs));
1240    }
1241
1242    if (!backing_hd) {
1243        error_free(bs->backing_blocker);
1244        bs->backing_blocker = NULL;
1245        bs->backing = NULL;
1246        goto out;
1247    }
1248    bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1249    bs->open_flags &= ~BDRV_O_NO_BACKING;
1250    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1251    pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1252            backing_hd->drv ? backing_hd->drv->format_name : "");
1253
1254    bdrv_op_block_all(backing_hd, bs->backing_blocker);
1255    /* Otherwise we won't be able to commit due to check in bdrv_commit */
1256    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1257                    bs->backing_blocker);
1258out:
1259    bdrv_refresh_limits(bs, NULL);
1260}
1261
1262/*
1263 * Opens the backing file for a BlockDriverState if not yet open
1264 *
1265 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1266 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1267 * itself, all options starting with "${bdref_key}." are considered part of the
1268 * BlockdevRef.
1269 *
1270 * TODO Can this be unified with bdrv_open_image()?
1271 */
1272int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1273                           const char *bdref_key, Error **errp)
1274{
1275    char *backing_filename = g_malloc0(PATH_MAX);
1276    char *bdref_key_dot;
1277    const char *reference = NULL;
1278    int ret = 0;
1279    BlockDriverState *backing_hd;
1280    QDict *options;
1281    QDict *tmp_parent_options = NULL;
1282    Error *local_err = NULL;
1283
1284    if (bs->backing != NULL) {
1285        goto free_exit;
1286    }
1287
1288    /* NULL means an empty set of options */
1289    if (parent_options == NULL) {
1290        tmp_parent_options = qdict_new();
1291        parent_options = tmp_parent_options;
1292    }
1293
1294    bs->open_flags &= ~BDRV_O_NO_BACKING;
1295
1296    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1297    qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1298    g_free(bdref_key_dot);
1299
1300    reference = qdict_get_try_str(parent_options, bdref_key);
1301    if (reference || qdict_haskey(options, "file.filename")) {
1302        backing_filename[0] = '\0';
1303    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1304        QDECREF(options);
1305        goto free_exit;
1306    } else {
1307        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1308                                       &local_err);
1309        if (local_err) {
1310            ret = -EINVAL;
1311            error_propagate(errp, local_err);
1312            QDECREF(options);
1313            goto free_exit;
1314        }
1315    }
1316
1317    if (!bs->drv || !bs->drv->supports_backing) {
1318        ret = -EINVAL;
1319        error_setg(errp, "Driver doesn't support backing files");
1320        QDECREF(options);
1321        goto free_exit;
1322    }
1323
1324    if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1325        qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1326    }
1327
1328    backing_hd = NULL;
1329    ret = bdrv_open_inherit(&backing_hd,
1330                            *backing_filename ? backing_filename : NULL,
1331                            reference, options, 0, bs, &child_backing,
1332                            errp);
1333    if (ret < 0) {
1334        bs->open_flags |= BDRV_O_NO_BACKING;
1335        error_prepend(errp, "Could not open backing file: ");
1336        goto free_exit;
1337    }
1338
1339    /* Hook up the backing file link; drop our reference, bs owns the
1340     * backing_hd reference now */
1341    bdrv_set_backing_hd(bs, backing_hd);
1342    bdrv_unref(backing_hd);
1343
1344    qdict_del(parent_options, bdref_key);
1345
1346free_exit:
1347    g_free(backing_filename);
1348    QDECREF(tmp_parent_options);
1349    return ret;
1350}
1351
1352/*
1353 * Opens a disk image whose options are given as BlockdevRef in another block
1354 * device's options.
1355 *
1356 * If allow_none is true, no image will be opened if filename is false and no
1357 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1358 *
1359 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1360 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1361 * itself, all options starting with "${bdref_key}." are considered part of the
1362 * BlockdevRef.
1363 *
1364 * The BlockdevRef will be removed from the options QDict.
1365 */
1366BdrvChild *bdrv_open_child(const char *filename,
1367                           QDict *options, const char *bdref_key,
1368                           BlockDriverState* parent,
1369                           const BdrvChildRole *child_role,
1370                           bool allow_none, Error **errp)
1371{
1372    BdrvChild *c = NULL;
1373    BlockDriverState *bs;
1374    QDict *image_options;
1375    int ret;
1376    char *bdref_key_dot;
1377    const char *reference;
1378
1379    assert(child_role != NULL);
1380
1381    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1382    qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1383    g_free(bdref_key_dot);
1384
1385    reference = qdict_get_try_str(options, bdref_key);
1386    if (!filename && !reference && !qdict_size(image_options)) {
1387        if (!allow_none) {
1388            error_setg(errp, "A block device must be specified for \"%s\"",
1389                       bdref_key);
1390        }
1391        QDECREF(image_options);
1392        goto done;
1393    }
1394
1395    bs = NULL;
1396    ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1397                            parent, child_role, errp);
1398    if (ret < 0) {
1399        goto done;
1400    }
1401
1402    c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1403
1404done:
1405    qdict_del(options, bdref_key);
1406    return c;
1407}
1408
1409static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1410                                     QDict *snapshot_options, Error **errp)
1411{
1412    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1413    char *tmp_filename = g_malloc0(PATH_MAX + 1);
1414    int64_t total_size;
1415    QemuOpts *opts = NULL;
1416    BlockDriverState *bs_snapshot;
1417    Error *local_err = NULL;
1418    int ret;
1419
1420    /* if snapshot, we create a temporary backing file and open it
1421       instead of opening 'filename' directly */
1422
1423    /* Get the required size from the image */
1424    total_size = bdrv_getlength(bs);
1425    if (total_size < 0) {
1426        ret = total_size;
1427        error_setg_errno(errp, -total_size, "Could not get image size");
1428        goto out;
1429    }
1430
1431    /* Create the temporary image */
1432    ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1433    if (ret < 0) {
1434        error_setg_errno(errp, -ret, "Could not get temporary filename");
1435        goto out;
1436    }
1437
1438    opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1439                            &error_abort);
1440    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1441    ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1442    qemu_opts_del(opts);
1443    if (ret < 0) {
1444        error_prepend(errp, "Could not create temporary overlay '%s': ",
1445                      tmp_filename);
1446        goto out;
1447    }
1448
1449    /* Prepare options QDict for the temporary file */
1450    qdict_put(snapshot_options, "file.driver",
1451              qstring_from_str("file"));
1452    qdict_put(snapshot_options, "file.filename",
1453              qstring_from_str(tmp_filename));
1454    qdict_put(snapshot_options, "driver",
1455              qstring_from_str("qcow2"));
1456
1457    bs_snapshot = bdrv_new();
1458
1459    ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1460                    flags, &local_err);
1461    snapshot_options = NULL;
1462    if (ret < 0) {
1463        error_propagate(errp, local_err);
1464        goto out;
1465    }
1466
1467    bdrv_append(bs_snapshot, bs);
1468
1469out:
1470    QDECREF(snapshot_options);
1471    g_free(tmp_filename);
1472    return ret;
1473}
1474
1475/*
1476 * Opens a disk image (raw, qcow2, vmdk, ...)
1477 *
1478 * options is a QDict of options to pass to the block drivers, or NULL for an
1479 * empty set of options. The reference to the QDict belongs to the block layer
1480 * after the call (even on failure), so if the caller intends to reuse the
1481 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1482 *
1483 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1484 * If it is not NULL, the referenced BDS will be reused.
1485 *
1486 * The reference parameter may be used to specify an existing block device which
1487 * should be opened. If specified, neither options nor a filename may be given,
1488 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1489 */
1490static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1491                             const char *reference, QDict *options, int flags,
1492                             BlockDriverState *parent,
1493                             const BdrvChildRole *child_role, Error **errp)
1494{
1495    int ret;
1496    BdrvChild *file = NULL;
1497    BlockDriverState *bs;
1498    BlockDriver *drv = NULL;
1499    const char *drvname;
1500    const char *backing;
1501    Error *local_err = NULL;
1502    QDict *snapshot_options = NULL;
1503    int snapshot_flags = 0;
1504
1505    assert(pbs);
1506    assert(!child_role || !flags);
1507    assert(!child_role == !parent);
1508
1509    if (reference) {
1510        bool options_non_empty = options ? qdict_size(options) : false;
1511        QDECREF(options);
1512
1513        if (*pbs) {
1514            error_setg(errp, "Cannot reuse an existing BDS when referencing "
1515                       "another block device");
1516            return -EINVAL;
1517        }
1518
1519        if (filename || options_non_empty) {
1520            error_setg(errp, "Cannot reference an existing block device with "
1521                       "additional options or a new filename");
1522            return -EINVAL;
1523        }
1524
1525        bs = bdrv_lookup_bs(reference, reference, errp);
1526        if (!bs) {
1527            return -ENODEV;
1528        }
1529
1530        if (bs->throttle_state) {
1531            error_setg(errp, "Cannot reference an existing block device for "
1532                       "which I/O throttling is enabled");
1533            return -EINVAL;
1534        }
1535
1536        bdrv_ref(bs);
1537        *pbs = bs;
1538        return 0;
1539    }
1540
1541    if (*pbs) {
1542        bs = *pbs;
1543    } else {
1544        bs = bdrv_new();
1545    }
1546
1547    /* NULL means an empty set of options */
1548    if (options == NULL) {
1549        options = qdict_new();
1550    }
1551
1552    /* json: syntax counts as explicit options, as if in the QDict */
1553    parse_json_protocol(options, &filename, &local_err);
1554    if (local_err) {
1555        ret = -EINVAL;
1556        goto fail;
1557    }
1558
1559    bs->explicit_options = qdict_clone_shallow(options);
1560
1561    if (child_role) {
1562        bs->inherits_from = parent;
1563        child_role->inherit_options(&flags, options,
1564                                    parent->open_flags, parent->options);
1565    }
1566
1567    ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1568    if (local_err) {
1569        goto fail;
1570    }
1571
1572    bs->open_flags = flags;
1573    bs->options = options;
1574    options = qdict_clone_shallow(options);
1575
1576    /* Find the right image format driver */
1577    drvname = qdict_get_try_str(options, "driver");
1578    if (drvname) {
1579        drv = bdrv_find_format(drvname);
1580        if (!drv) {
1581            error_setg(errp, "Unknown driver: '%s'", drvname);
1582            ret = -EINVAL;
1583            goto fail;
1584        }
1585    }
1586
1587    assert(drvname || !(flags & BDRV_O_PROTOCOL));
1588
1589    backing = qdict_get_try_str(options, "backing");
1590    if (backing && *backing == '\0') {
1591        flags |= BDRV_O_NO_BACKING;
1592        qdict_del(options, "backing");
1593    }
1594
1595    /* Open image file without format layer */
1596    if ((flags & BDRV_O_PROTOCOL) == 0) {
1597        if (flags & BDRV_O_RDWR) {
1598            flags |= BDRV_O_ALLOW_RDWR;
1599        }
1600        if (flags & BDRV_O_SNAPSHOT) {
1601            snapshot_options = qdict_new();
1602            bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1603                                       flags, options);
1604            bdrv_backing_options(&flags, options, flags, options);
1605        }
1606
1607        bs->open_flags = flags;
1608
1609        file = bdrv_open_child(filename, options, "file", bs,
1610                               &child_file, true, &local_err);
1611        if (local_err) {
1612            ret = -EINVAL;
1613            goto fail;
1614        }
1615    }
1616
1617    /* Image format probing */
1618    bs->probed = !drv;
1619    if (!drv && file) {
1620        ret = find_image_format(file->bs, filename, &drv, &local_err);
1621        if (ret < 0) {
1622            goto fail;
1623        }
1624        /*
1625         * This option update would logically belong in bdrv_fill_options(),
1626         * but we first need to open bs->file for the probing to work, while
1627         * opening bs->file already requires the (mostly) final set of options
1628         * so that cache mode etc. can be inherited.
1629         *
1630         * Adding the driver later is somewhat ugly, but it's not an option
1631         * that would ever be inherited, so it's correct. We just need to make
1632         * sure to update both bs->options (which has the full effective
1633         * options for bs) and options (which has file.* already removed).
1634         */
1635        qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1636        qdict_put(options, "driver", qstring_from_str(drv->format_name));
1637    } else if (!drv) {
1638        error_setg(errp, "Must specify either driver or file");
1639        ret = -EINVAL;
1640        goto fail;
1641    }
1642
1643    /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1644    assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1645    /* file must be NULL if a protocol BDS is about to be created
1646     * (the inverse results in an error message from bdrv_open_common()) */
1647    assert(!(flags & BDRV_O_PROTOCOL) || !file);
1648
1649    /* Open the image */
1650    ret = bdrv_open_common(bs, file, options, &local_err);
1651    if (ret < 0) {
1652        goto fail;
1653    }
1654
1655    if (file && (bs->file != file)) {
1656        bdrv_unref_child(bs, file);
1657        file = NULL;
1658    }
1659
1660    /* If there is a backing file, use it */
1661    if ((flags & BDRV_O_NO_BACKING) == 0) {
1662        ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1663        if (ret < 0) {
1664            goto close_and_fail;
1665        }
1666    }
1667
1668    bdrv_refresh_filename(bs);
1669
1670    /* Check if any unknown options were used */
1671    if (options && (qdict_size(options) != 0)) {
1672        const QDictEntry *entry = qdict_first(options);
1673        if (flags & BDRV_O_PROTOCOL) {
1674            error_setg(errp, "Block protocol '%s' doesn't support the option "
1675                       "'%s'", drv->format_name, entry->key);
1676        } else {
1677            error_setg(errp,
1678                       "Block format '%s' does not support the option '%s'",
1679                       drv->format_name, entry->key);
1680        }
1681
1682        ret = -EINVAL;
1683        goto close_and_fail;
1684    }
1685
1686    if (!bdrv_key_required(bs)) {
1687        if (bs->blk) {
1688            blk_dev_change_media_cb(bs->blk, true);
1689        }
1690    } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1691               && !runstate_check(RUN_STATE_INMIGRATE)
1692               && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1693        error_setg(errp,
1694                   "Guest must be stopped for opening of encrypted image");
1695        ret = -EBUSY;
1696        goto close_and_fail;
1697    }
1698
1699    QDECREF(options);
1700    *pbs = bs;
1701
1702    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1703     * temporary snapshot afterwards. */
1704    if (snapshot_flags) {
1705        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1706                                        &local_err);
1707        snapshot_options = NULL;
1708        if (local_err) {
1709            goto close_and_fail;
1710        }
1711    }
1712
1713    return 0;
1714
1715fail:
1716    if (file != NULL) {
1717        bdrv_unref_child(bs, file);
1718    }
1719    QDECREF(snapshot_options);
1720    QDECREF(bs->explicit_options);
1721    QDECREF(bs->options);
1722    QDECREF(options);
1723    bs->options = NULL;
1724    if (!*pbs) {
1725        /* If *pbs is NULL, a new BDS has been created in this function and
1726           needs to be freed now. Otherwise, it does not need to be closed,
1727           since it has not really been opened yet. */
1728        bdrv_unref(bs);
1729    }
1730    if (local_err) {
1731        error_propagate(errp, local_err);
1732    }
1733    return ret;
1734
1735close_and_fail:
1736    /* See fail path, but now the BDS has to be always closed */
1737    if (*pbs) {
1738        bdrv_close(bs);
1739    } else {
1740        bdrv_unref(bs);
1741    }
1742    QDECREF(snapshot_options);
1743    QDECREF(options);
1744    if (local_err) {
1745        error_propagate(errp, local_err);
1746    }
1747    return ret;
1748}
1749
1750int bdrv_open(BlockDriverState **pbs, const char *filename,
1751              const char *reference, QDict *options, int flags, Error **errp)
1752{
1753    return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1754                             NULL, errp);
1755}
1756
1757typedef struct BlockReopenQueueEntry {
1758     bool prepared;
1759     BDRVReopenState state;
1760     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1761} BlockReopenQueueEntry;
1762
1763/*
1764 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1765 * reopen of multiple devices.
1766 *
1767 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1768 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1769 * be created and initialized. This newly created BlockReopenQueue should be
1770 * passed back in for subsequent calls that are intended to be of the same
1771 * atomic 'set'.
1772 *
1773 * bs is the BlockDriverState to add to the reopen queue.
1774 *
1775 * options contains the changed options for the associated bs
1776 * (the BlockReopenQueue takes ownership)
1777 *
1778 * flags contains the open flags for the associated bs
1779 *
1780 * returns a pointer to bs_queue, which is either the newly allocated
1781 * bs_queue, or the existing bs_queue being used.
1782 *
1783 */
1784static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1785                                                 BlockDriverState *bs,
1786                                                 QDict *options,
1787                                                 int flags,
1788                                                 const BdrvChildRole *role,
1789                                                 QDict *parent_options,
1790                                                 int parent_flags)
1791{
1792    assert(bs != NULL);
1793
1794    BlockReopenQueueEntry *bs_entry;
1795    BdrvChild *child;
1796    QDict *old_options, *explicit_options;
1797
1798    if (bs_queue == NULL) {
1799        bs_queue = g_new0(BlockReopenQueue, 1);
1800        QSIMPLEQ_INIT(bs_queue);
1801    }
1802
1803    if (!options) {
1804        options = qdict_new();
1805    }
1806
1807    /*
1808     * Precedence of options:
1809     * 1. Explicitly passed in options (highest)
1810     * 2. Set in flags (only for top level)
1811     * 3. Retained from explicitly set options of bs
1812     * 4. Inherited from parent node
1813     * 5. Retained from effective options of bs
1814     */
1815
1816    if (!parent_options) {
1817        /*
1818         * Any setting represented by flags is always updated. If the
1819         * corresponding QDict option is set, it takes precedence. Otherwise
1820         * the flag is translated into a QDict option. The old setting of bs is
1821         * not considered.
1822         */
1823        update_options_from_flags(options, flags);
1824    }
1825
1826    /* Old explicitly set values (don't overwrite by inherited value) */
1827    old_options = qdict_clone_shallow(bs->explicit_options);
1828    bdrv_join_options(bs, options, old_options);
1829    QDECREF(old_options);
1830
1831    explicit_options = qdict_clone_shallow(options);
1832
1833    /* Inherit from parent node */
1834    if (parent_options) {
1835        assert(!flags);
1836        role->inherit_options(&flags, options, parent_flags, parent_options);
1837    }
1838
1839    /* Old values are used for options that aren't set yet */
1840    old_options = qdict_clone_shallow(bs->options);
1841    bdrv_join_options(bs, options, old_options);
1842    QDECREF(old_options);
1843
1844    /* bdrv_open() masks this flag out */
1845    flags &= ~BDRV_O_PROTOCOL;
1846
1847    QLIST_FOREACH(child, &bs->children, next) {
1848        QDict *new_child_options;
1849        char *child_key_dot;
1850
1851        /* reopen can only change the options of block devices that were
1852         * implicitly created and inherited options. For other (referenced)
1853         * block devices, a syntax like "backing.foo" results in an error. */
1854        if (child->bs->inherits_from != bs) {
1855            continue;
1856        }
1857
1858        child_key_dot = g_strdup_printf("%s.", child->name);
1859        qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1860        g_free(child_key_dot);
1861
1862        bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1863                                child->role, options, flags);
1864    }
1865
1866    bs_entry = g_new0(BlockReopenQueueEntry, 1);
1867    QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1868
1869    bs_entry->state.bs = bs;
1870    bs_entry->state.options = options;
1871    bs_entry->state.explicit_options = explicit_options;
1872    bs_entry->state.flags = flags;
1873
1874    return bs_queue;
1875}
1876
1877BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1878                                    BlockDriverState *bs,
1879                                    QDict *options, int flags)
1880{
1881    return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1882                                   NULL, NULL, 0);
1883}
1884
1885/*
1886 * Reopen multiple BlockDriverStates atomically & transactionally.
1887 *
1888 * The queue passed in (bs_queue) must have been built up previous
1889 * via bdrv_reopen_queue().
1890 *
1891 * Reopens all BDS specified in the queue, with the appropriate
1892 * flags.  All devices are prepared for reopen, and failure of any
1893 * device will cause all device changes to be abandonded, and intermediate
1894 * data cleaned up.
1895 *
1896 * If all devices prepare successfully, then the changes are committed
1897 * to all devices.
1898 *
1899 */
1900int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1901{
1902    int ret = -1;
1903    BlockReopenQueueEntry *bs_entry, *next;
1904    Error *local_err = NULL;
1905
1906    assert(bs_queue != NULL);
1907
1908    bdrv_drain_all();
1909
1910    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1911        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1912            error_propagate(errp, local_err);
1913            goto cleanup;
1914        }
1915        bs_entry->prepared = true;
1916    }
1917
1918    /* If we reach this point, we have success and just need to apply the
1919     * changes
1920     */
1921    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1922        bdrv_reopen_commit(&bs_entry->state);
1923    }
1924
1925    ret = 0;
1926
1927cleanup:
1928    QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1929        if (ret && bs_entry->prepared) {
1930            bdrv_reopen_abort(&bs_entry->state);
1931        } else if (ret) {
1932            QDECREF(bs_entry->state.explicit_options);
1933        }
1934        QDECREF(bs_entry->state.options);
1935        g_free(bs_entry);
1936    }
1937    g_free(bs_queue);
1938    return ret;
1939}
1940
1941
1942/* Reopen a single BlockDriverState with the specified flags. */
1943int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1944{
1945    int ret = -1;
1946    Error *local_err = NULL;
1947    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1948
1949    ret = bdrv_reopen_multiple(queue, &local_err);
1950    if (local_err != NULL) {
1951        error_propagate(errp, local_err);
1952    }
1953    return ret;
1954}
1955
1956
1957/*
1958 * Prepares a BlockDriverState for reopen. All changes are staged in the
1959 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1960 * the block driver layer .bdrv_reopen_prepare()
1961 *
1962 * bs is the BlockDriverState to reopen
1963 * flags are the new open flags
1964 * queue is the reopen queue
1965 *
1966 * Returns 0 on success, non-zero on error.  On error errp will be set
1967 * as well.
1968 *
1969 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1970 * It is the responsibility of the caller to then call the abort() or
1971 * commit() for any other BDS that have been left in a prepare() state
1972 *
1973 */
1974int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1975                        Error **errp)
1976{
1977    int ret = -1;
1978    Error *local_err = NULL;
1979    BlockDriver *drv;
1980    QemuOpts *opts;
1981    const char *value;
1982
1983    assert(reopen_state != NULL);
1984    assert(reopen_state->bs->drv != NULL);
1985    drv = reopen_state->bs->drv;
1986
1987    /* Process generic block layer options */
1988    opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1989    qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1990    if (local_err) {
1991        error_propagate(errp, local_err);
1992        ret = -EINVAL;
1993        goto error;
1994    }
1995
1996    update_flags_from_options(&reopen_state->flags, opts);
1997
1998    /* node-name and driver must be unchanged. Put them back into the QDict, so
1999     * that they are checked at the end of this function. */
2000    value = qemu_opt_get(opts, "node-name");
2001    if (value) {
2002        qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2003    }
2004
2005    value = qemu_opt_get(opts, "driver");
2006    if (value) {
2007        qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2008    }
2009
2010    /* if we are to stay read-only, do not allow permission change
2011     * to r/w */
2012    if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2013        reopen_state->flags & BDRV_O_RDWR) {
2014        error_setg(errp, "Node '%s' is read only",
2015                   bdrv_get_device_or_node_name(reopen_state->bs));
2016        goto error;
2017    }
2018
2019
2020    ret = bdrv_flush(reopen_state->bs);
2021    if (ret) {
2022        error_setg_errno(errp, -ret, "Error flushing drive");
2023        goto error;
2024    }
2025
2026    if (drv->bdrv_reopen_prepare) {
2027        ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2028        if (ret) {
2029            if (local_err != NULL) {
2030                error_propagate(errp, local_err);
2031            } else {
2032                error_setg(errp, "failed while preparing to reopen image '%s'",
2033                           reopen_state->bs->filename);
2034            }
2035            goto error;
2036        }
2037    } else {
2038        /* It is currently mandatory to have a bdrv_reopen_prepare()
2039         * handler for each supported drv. */
2040        error_setg(errp, "Block format '%s' used by node '%s' "
2041                   "does not support reopening files", drv->format_name,
2042                   bdrv_get_device_or_node_name(reopen_state->bs));
2043        ret = -1;
2044        goto error;
2045    }
2046
2047    /* Options that are not handled are only okay if they are unchanged
2048     * compared to the old state. It is expected that some options are only
2049     * used for the initial open, but not reopen (e.g. filename) */
2050    if (qdict_size(reopen_state->options)) {
2051        const QDictEntry *entry = qdict_first(reopen_state->options);
2052
2053        do {
2054            QString *new_obj = qobject_to_qstring(entry->value);
2055            const char *new = qstring_get_str(new_obj);
2056            const char *old = qdict_get_try_str(reopen_state->bs->options,
2057                                                entry->key);
2058
2059            if (!old || strcmp(new, old)) {
2060                error_setg(errp, "Cannot change the option '%s'", entry->key);
2061                ret = -EINVAL;
2062                goto error;
2063            }
2064        } while ((entry = qdict_next(reopen_state->options, entry)));
2065    }
2066
2067    ret = 0;
2068
2069error:
2070    qemu_opts_del(opts);
2071    return ret;
2072}
2073
2074/*
2075 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2076 * makes them final by swapping the staging BlockDriverState contents into
2077 * the active BlockDriverState contents.
2078 */
2079void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2080{
2081    BlockDriver *drv;
2082
2083    assert(reopen_state != NULL);
2084    drv = reopen_state->bs->drv;
2085    assert(drv != NULL);
2086
2087    /* If there are any driver level actions to take */
2088    if (drv->bdrv_reopen_commit) {
2089        drv->bdrv_reopen_commit(reopen_state);
2090    }
2091
2092    /* set BDS specific flags now */
2093    QDECREF(reopen_state->bs->explicit_options);
2094
2095    reopen_state->bs->explicit_options   = reopen_state->explicit_options;
2096    reopen_state->bs->open_flags         = reopen_state->flags;
2097    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2098
2099    bdrv_refresh_limits(reopen_state->bs, NULL);
2100}
2101
2102/*
2103 * Abort the reopen, and delete and free the staged changes in
2104 * reopen_state
2105 */
2106void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2107{
2108    BlockDriver *drv;
2109
2110    assert(reopen_state != NULL);
2111    drv = reopen_state->bs->drv;
2112    assert(drv != NULL);
2113
2114    if (drv->bdrv_reopen_abort) {
2115        drv->bdrv_reopen_abort(reopen_state);
2116    }
2117
2118    QDECREF(reopen_state->explicit_options);
2119}
2120
2121
2122static void bdrv_close(BlockDriverState *bs)
2123{
2124    BdrvAioNotifier *ban, *ban_next;
2125
2126    assert(!bs->job);
2127
2128    /* Disable I/O limits and drain all pending throttled requests */
2129    if (bs->throttle_state) {
2130        bdrv_io_limits_disable(bs);
2131    }
2132
2133    bdrv_drained_begin(bs); /* complete I/O */
2134    bdrv_flush(bs);
2135    bdrv_drain(bs); /* in case flush left pending I/O */
2136
2137    bdrv_release_named_dirty_bitmaps(bs);
2138    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2139
2140    if (bs->blk) {
2141        blk_dev_change_media_cb(bs->blk, false);
2142    }
2143
2144    if (bs->drv) {
2145        BdrvChild *child, *next;
2146
2147        bs->drv->bdrv_close(bs);
2148        bs->drv = NULL;
2149
2150        bdrv_set_backing_hd(bs, NULL);
2151
2152        if (bs->file != NULL) {
2153            bdrv_unref_child(bs, bs->file);
2154            bs->file = NULL;
2155        }
2156
2157        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2158            /* TODO Remove bdrv_unref() from drivers' close function and use
2159             * bdrv_unref_child() here */
2160            if (child->bs->inherits_from == bs) {
2161                child->bs->inherits_from = NULL;
2162            }
2163            bdrv_detach_child(child);
2164        }
2165
2166        g_free(bs->opaque);
2167        bs->opaque = NULL;
2168        bs->copy_on_read = 0;
2169        bs->backing_file[0] = '\0';
2170        bs->backing_format[0] = '\0';
2171        bs->total_sectors = 0;
2172        bs->encrypted = 0;
2173        bs->valid_key = 0;
2174        bs->sg = 0;
2175        bs->zero_beyond_eof = false;
2176        QDECREF(bs->options);
2177        QDECREF(bs->explicit_options);
2178        bs->options = NULL;
2179        QDECREF(bs->full_open_options);
2180        bs->full_open_options = NULL;
2181    }
2182
2183    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2184        g_free(ban);
2185    }
2186    QLIST_INIT(&bs->aio_notifiers);
2187    bdrv_drained_end(bs);
2188}
2189
2190void bdrv_close_all(void)
2191{
2192    BlockDriverState *bs;
2193    AioContext *aio_context;
2194
2195    /* Drop references from requests still in flight, such as canceled block
2196     * jobs whose AIO context has not been polled yet */
2197    bdrv_drain_all();
2198
2199    blk_remove_all_bs();
2200    blockdev_close_all_bdrv_states();
2201
2202    /* Cancel all block jobs */
2203    while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2204        QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2205            aio_context = bdrv_get_aio_context(bs);
2206
2207            aio_context_acquire(aio_context);
2208            if (bs->job) {
2209                block_job_cancel_sync(bs->job);
2210                aio_context_release(aio_context);
2211                break;
2212            }
2213            aio_context_release(aio_context);
2214        }
2215
2216        /* All the remaining BlockDriverStates are referenced directly or
2217         * indirectly from block jobs, so there needs to be at least one BDS
2218         * directly used by a block job */
2219        assert(bs);
2220    }
2221}
2222
2223/* Fields that need to stay with the top-level BDS */
2224static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2225                                     BlockDriverState *bs_src)
2226{
2227    /* move some fields that need to stay attached to the device */
2228}
2229
2230static void change_parent_backing_link(BlockDriverState *from,
2231                                       BlockDriverState *to)
2232{
2233    BdrvChild *c, *next;
2234
2235    if (from->blk) {
2236        /* FIXME We bypass blk_set_bs(), so we need to make these updates
2237         * manually. The root problem is not in this change function, but the
2238         * existence of BlockDriverState.blk. */
2239        to->blk = from->blk;
2240        from->blk = NULL;
2241    }
2242
2243    QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2244        assert(c->role != &child_backing);
2245        c->bs = to;
2246        QLIST_REMOVE(c, next_parent);
2247        QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2248        bdrv_ref(to);
2249        bdrv_unref(from);
2250    }
2251}
2252
2253static void swap_feature_fields(BlockDriverState *bs_top,
2254                                BlockDriverState *bs_new)
2255{
2256    BlockDriverState tmp;
2257
2258    bdrv_move_feature_fields(&tmp, bs_top);
2259    bdrv_move_feature_fields(bs_top, bs_new);
2260    bdrv_move_feature_fields(bs_new, &tmp);
2261
2262    assert(!bs_new->throttle_state);
2263    if (bs_top->throttle_state) {
2264        assert(bs_top->io_limits_enabled);
2265        bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2266        bdrv_io_limits_disable(bs_top);
2267    }
2268}
2269
2270/*
2271 * Add new bs contents at the top of an image chain while the chain is
2272 * live, while keeping required fields on the top layer.
2273 *
2274 * This will modify the BlockDriverState fields, and swap contents
2275 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2276 *
2277 * bs_new must not be attached to a BlockBackend.
2278 *
2279 * This function does not create any image files.
2280 *
2281 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2282 * that's what the callers commonly need. bs_new will be referenced by the old
2283 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2284 * reference of its own, it must call bdrv_ref().
2285 */
2286void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2287{
2288    assert(!bdrv_requests_pending(bs_top));
2289    assert(!bdrv_requests_pending(bs_new));
2290
2291    bdrv_ref(bs_top);
2292    change_parent_backing_link(bs_top, bs_new);
2293
2294    /* Some fields always stay on top of the backing file chain */
2295    swap_feature_fields(bs_top, bs_new);
2296
2297    bdrv_set_backing_hd(bs_new, bs_top);
2298    bdrv_unref(bs_top);
2299
2300    /* bs_new is now referenced by its new parents, we don't need the
2301     * additional reference any more. */
2302    bdrv_unref(bs_new);
2303}
2304
2305void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2306{
2307    assert(!bdrv_requests_pending(old));
2308    assert(!bdrv_requests_pending(new));
2309
2310    bdrv_ref(old);
2311
2312    if (old->blk) {
2313        /* As long as these fields aren't in BlockBackend, but in the top-level
2314         * BlockDriverState, it's not possible for a BDS to have two BBs.
2315         *
2316         * We really want to copy the fields from old to new, but we go for a
2317         * swap instead so that pointers aren't duplicated and cause trouble.
2318         * (Also, bdrv_swap() used to do the same.) */
2319        assert(!new->blk);
2320        swap_feature_fields(old, new);
2321    }
2322    change_parent_backing_link(old, new);
2323
2324    /* Change backing files if a previously independent node is added to the
2325     * chain. For active commit, we replace top by its own (indirect) backing
2326     * file and don't do anything here so we don't build a loop. */
2327    if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2328        bdrv_set_backing_hd(new, backing_bs(old));
2329        bdrv_set_backing_hd(old, NULL);
2330    }
2331
2332    bdrv_unref(old);
2333}
2334
2335static void bdrv_delete(BlockDriverState *bs)
2336{
2337    assert(!bs->job);
2338    assert(bdrv_op_blocker_is_empty(bs));
2339    assert(!bs->refcnt);
2340
2341    bdrv_close(bs);
2342
2343    /* remove from list, if necessary */
2344    if (bs->node_name[0] != '\0') {
2345        QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2346    }
2347    QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2348
2349    g_free(bs);
2350}
2351
2352/*
2353 * Run consistency checks on an image
2354 *
2355 * Returns 0 if the check could be completed (it doesn't mean that the image is
2356 * free of errors) or -errno when an internal error occurred. The results of the
2357 * check are stored in res.
2358 */
2359int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2360{
2361    if (bs->drv == NULL) {
2362        return -ENOMEDIUM;
2363    }
2364    if (bs->drv->bdrv_check == NULL) {
2365        return -ENOTSUP;
2366    }
2367
2368    memset(res, 0, sizeof(*res));
2369    return bs->drv->bdrv_check(bs, res, fix);
2370}
2371
2372#define COMMIT_BUF_SECTORS 2048
2373
2374/* commit COW file into the raw image */
2375int bdrv_commit(BlockDriverState *bs)
2376{
2377    BlockDriver *drv = bs->drv;
2378    int64_t sector, total_sectors, length, backing_length;
2379    int n, ro, open_flags;
2380    int ret = 0;
2381    uint8_t *buf = NULL;
2382
2383    if (!drv)
2384        return -ENOMEDIUM;
2385
2386    if (!bs->backing) {
2387        return -ENOTSUP;
2388    }
2389
2390    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2391        bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2392        return -EBUSY;
2393    }
2394
2395    ro = bs->backing->bs->read_only;
2396    open_flags =  bs->backing->bs->open_flags;
2397
2398    if (ro) {
2399        if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2400            return -EACCES;
2401        }
2402    }
2403
2404    length = bdrv_getlength(bs);
2405    if (length < 0) {
2406        ret = length;
2407        goto ro_cleanup;
2408    }
2409
2410    backing_length = bdrv_getlength(bs->backing->bs);
2411    if (backing_length < 0) {
2412        ret = backing_length;
2413        goto ro_cleanup;
2414    }
2415
2416    /* If our top snapshot is larger than the backing file image,
2417     * grow the backing file image if possible.  If not possible,
2418     * we must return an error */
2419    if (length > backing_length) {
2420        ret = bdrv_truncate(bs->backing->bs, length);
2421        if (ret < 0) {
2422            goto ro_cleanup;
2423        }
2424    }
2425
2426    total_sectors = length >> BDRV_SECTOR_BITS;
2427
2428    /* qemu_try_blockalign() for bs will choose an alignment that works for
2429     * bs->backing->bs as well, so no need to compare the alignment manually. */
2430    buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2431    if (buf == NULL) {
2432        ret = -ENOMEM;
2433        goto ro_cleanup;
2434    }
2435
2436    for (sector = 0; sector < total_sectors; sector += n) {
2437        ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2438        if (ret < 0) {
2439            goto ro_cleanup;
2440        }
2441        if (ret) {
2442            ret = bdrv_read(bs, sector, buf, n);
2443            if (ret < 0) {
2444                goto ro_cleanup;
2445            }
2446
2447            ret = bdrv_write(bs->backing->bs, sector, buf, n);
2448            if (ret < 0) {
2449                goto ro_cleanup;
2450            }
2451        }
2452    }
2453
2454    if (drv->bdrv_make_empty) {
2455        ret = drv->bdrv_make_empty(bs);
2456        if (ret < 0) {
2457            goto ro_cleanup;
2458        }
2459        bdrv_flush(bs);
2460    }
2461
2462    /*
2463     * Make sure all data we wrote to the backing device is actually
2464     * stable on disk.
2465     */
2466    if (bs->backing) {
2467        bdrv_flush(bs->backing->bs);
2468    }
2469
2470    ret = 0;
2471ro_cleanup:
2472    qemu_vfree(buf);
2473
2474    if (ro) {
2475        /* ignoring error return here */
2476        bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2477    }
2478
2479    return ret;
2480}
2481
2482/*
2483 * Return values:
2484 * 0        - success
2485 * -EINVAL  - backing format specified, but no file
2486 * -ENOSPC  - can't update the backing file because no space is left in the
2487 *            image file header
2488 * -ENOTSUP - format driver doesn't support changing the backing file
2489 */
2490int bdrv_change_backing_file(BlockDriverState *bs,
2491    const char *backing_file, const char *backing_fmt)
2492{
2493    BlockDriver *drv = bs->drv;
2494    int ret;
2495
2496    /* Backing file format doesn't make sense without a backing file */
2497    if (backing_fmt && !backing_file) {
2498        return -EINVAL;
2499    }
2500
2501    if (drv->bdrv_change_backing_file != NULL) {
2502        ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2503    } else {
2504        ret = -ENOTSUP;
2505    }
2506
2507    if (ret == 0) {
2508        pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2509        pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2510    }
2511    return ret;
2512}
2513
2514/*
2515 * Finds the image layer in the chain that has 'bs' as its backing file.
2516 *
2517 * active is the current topmost image.
2518 *
2519 * Returns NULL if bs is not found in active's image chain,
2520 * or if active == bs.
2521 *
2522 * Returns the bottommost base image if bs == NULL.
2523 */
2524BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2525                                    BlockDriverState *bs)
2526{
2527    while (active && bs != backing_bs(active)) {
2528        active = backing_bs(active);
2529    }
2530
2531    return active;
2532}
2533
2534/* Given a BDS, searches for the base layer. */
2535BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2536{
2537    return bdrv_find_overlay(bs, NULL);
2538}
2539
2540/*
2541 * Drops images above 'base' up to and including 'top', and sets the image
2542 * above 'top' to have base as its backing file.
2543 *
2544 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2545 * information in 'bs' can be properly updated.
2546 *
2547 * E.g., this will convert the following chain:
2548 * bottom <- base <- intermediate <- top <- active
2549 *
2550 * to
2551 *
2552 * bottom <- base <- active
2553 *
2554 * It is allowed for bottom==base, in which case it converts:
2555 *
2556 * base <- intermediate <- top <- active
2557 *
2558 * to
2559 *
2560 * base <- active
2561 *
2562 * If backing_file_str is non-NULL, it will be used when modifying top's
2563 * overlay image metadata.
2564 *
2565 * Error conditions:
2566 *  if active == top, that is considered an error
2567 *
2568 */
2569int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2570                           BlockDriverState *base, const char *backing_file_str)
2571{
2572    BlockDriverState *new_top_bs = NULL;
2573    int ret = -EIO;
2574
2575    if (!top->drv || !base->drv) {
2576        goto exit;
2577    }
2578
2579    new_top_bs = bdrv_find_overlay(active, top);
2580
2581    if (new_top_bs == NULL) {
2582        /* we could not find the image above 'top', this is an error */
2583        goto exit;
2584    }
2585
2586    /* special case of new_top_bs->backing->bs already pointing to base - nothing
2587     * to do, no intermediate images */
2588    if (backing_bs(new_top_bs) == base) {
2589        ret = 0;
2590        goto exit;
2591    }
2592
2593    /* Make sure that base is in the backing chain of top */
2594    if (!bdrv_chain_contains(top, base)) {
2595        goto exit;
2596    }
2597
2598    /* success - we can delete the intermediate states, and link top->base */
2599    backing_file_str = backing_file_str ? backing_file_str : base->filename;
2600    ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2601                                   base->drv ? base->drv->format_name : "");
2602    if (ret) {
2603        goto exit;
2604    }
2605    bdrv_set_backing_hd(new_top_bs, base);
2606
2607    ret = 0;
2608exit:
2609    return ret;
2610}
2611
2612/**
2613 * Truncate file to 'offset' bytes (needed only for file protocols)
2614 */
2615int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2616{
2617    BlockDriver *drv = bs->drv;
2618    int ret;
2619    if (!drv)
2620        return -ENOMEDIUM;
2621    if (!drv->bdrv_truncate)
2622        return -ENOTSUP;
2623    if (bs->read_only)
2624        return -EACCES;
2625
2626    ret = drv->bdrv_truncate(bs, offset);
2627    if (ret == 0) {
2628        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2629        bdrv_dirty_bitmap_truncate(bs);
2630        if (bs->blk) {
2631            blk_dev_resize_cb(bs->blk);
2632        }
2633    }
2634    return ret;
2635}
2636
2637/**
2638 * Length of a allocated file in bytes. Sparse files are counted by actual
2639 * allocated space. Return < 0 if error or unknown.
2640 */
2641int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2642{
2643    BlockDriver *drv = bs->drv;
2644    if (!drv) {
2645        return -ENOMEDIUM;
2646    }
2647    if (drv->bdrv_get_allocated_file_size) {
2648        return drv->bdrv_get_allocated_file_size(bs);
2649    }
2650    if (bs->file) {
2651        return bdrv_get_allocated_file_size(bs->file->bs);
2652    }
2653    return -ENOTSUP;
2654}
2655
2656/**
2657 * Return number of sectors on success, -errno on error.
2658 */
2659int64_t bdrv_nb_sectors(BlockDriverState *bs)
2660{
2661    BlockDriver *drv = bs->drv;
2662
2663    if (!drv)
2664        return -ENOMEDIUM;
2665
2666    if (drv->has_variable_length) {
2667        int ret = refresh_total_sectors(bs, bs->total_sectors);
2668        if (ret < 0) {
2669            return ret;
2670        }
2671    }
2672    return bs->total_sectors;
2673}
2674
2675/**
2676 * Return length in bytes on success, -errno on error.
2677 * The length is always a multiple of BDRV_SECTOR_SIZE.
2678 */
2679int64_t bdrv_getlength(BlockDriverState *bs)
2680{
2681    int64_t ret = bdrv_nb_sectors(bs);
2682
2683    ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2684    return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2685}
2686
2687/* return 0 as number of sectors if no device present or error */
2688void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2689{
2690    int64_t nb_sectors = bdrv_nb_sectors(bs);
2691
2692    *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2693}
2694
2695int bdrv_is_read_only(BlockDriverState *bs)
2696{
2697    return bs->read_only;
2698}
2699
2700int bdrv_is_sg(BlockDriverState *bs)
2701{
2702    return bs->sg;
2703}
2704
2705int bdrv_is_encrypted(BlockDriverState *bs)
2706{
2707    if (bs->backing && bs->backing->bs->encrypted) {
2708        return 1;
2709    }
2710    return bs->encrypted;
2711}
2712
2713int bdrv_key_required(BlockDriverState *bs)
2714{
2715    BdrvChild *backing = bs->backing;
2716
2717    if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2718        return 1;
2719    }
2720    return (bs->encrypted && !bs->valid_key);
2721}
2722
2723int bdrv_set_key(BlockDriverState *bs, const char *key)
2724{
2725    int ret;
2726    if (bs->backing && bs->backing->bs->encrypted) {
2727        ret = bdrv_set_key(bs->backing->bs, key);
2728        if (ret < 0)
2729            return ret;
2730        if (!bs->encrypted)
2731            return 0;
2732    }
2733    if (!bs->encrypted) {
2734        return -EINVAL;
2735    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2736        return -ENOMEDIUM;
2737    }
2738    ret = bs->drv->bdrv_set_key(bs, key);
2739    if (ret < 0) {
2740        bs->valid_key = 0;
2741    } else if (!bs->valid_key) {
2742        bs->valid_key = 1;
2743        if (bs->blk) {
2744            /* call the change callback now, we skipped it on open */
2745            blk_dev_change_media_cb(bs->blk, true);
2746        }
2747    }
2748    return ret;
2749}
2750
2751/*
2752 * Provide an encryption key for @bs.
2753 * If @key is non-null:
2754 *     If @bs is not encrypted, fail.
2755 *     Else if the key is invalid, fail.
2756 *     Else set @bs's key to @key, replacing the existing key, if any.
2757 * If @key is null:
2758 *     If @bs is encrypted and still lacks a key, fail.
2759 *     Else do nothing.
2760 * On failure, store an error object through @errp if non-null.
2761 */
2762void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2763{
2764    if (key) {
2765        if (!bdrv_is_encrypted(bs)) {
2766            error_setg(errp, "Node '%s' is not encrypted",
2767                      bdrv_get_device_or_node_name(bs));
2768        } else if (bdrv_set_key(bs, key) < 0) {
2769            error_setg(errp, QERR_INVALID_PASSWORD);
2770        }
2771    } else {
2772        if (bdrv_key_required(bs)) {
2773            error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2774                      "'%s' (%s) is encrypted",
2775                      bdrv_get_device_or_node_name(bs),
2776                      bdrv_get_encrypted_filename(bs));
2777        }
2778    }
2779}
2780
2781const char *bdrv_get_format_name(BlockDriverState *bs)
2782{
2783    return bs->drv ? bs->drv->format_name : NULL;
2784}
2785
2786static int qsort_strcmp(const void *a, const void *b)
2787{
2788    return strcmp(a, b);
2789}
2790
2791void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2792                         void *opaque)
2793{
2794    BlockDriver *drv;
2795    int count = 0;
2796    int i;
2797    const char **formats = NULL;
2798
2799    QLIST_FOREACH(drv, &bdrv_drivers, list) {
2800        if (drv->format_name) {
2801            bool found = false;
2802            int i = count;
2803            while (formats && i && !found) {
2804                found = !strcmp(formats[--i], drv->format_name);
2805            }
2806
2807            if (!found) {
2808                formats = g_renew(const char *, formats, count + 1);
2809                formats[count++] = drv->format_name;
2810            }
2811        }
2812    }
2813
2814    qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2815
2816    for (i = 0; i < count; i++) {
2817        it(opaque, formats[i]);
2818    }
2819
2820    g_free(formats);
2821}
2822
2823/* This function is to find a node in the bs graph */
2824BlockDriverState *bdrv_find_node(const char *node_name)
2825{
2826    BlockDriverState *bs;
2827
2828    assert(node_name);
2829
2830    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2831        if (!strcmp(node_name, bs->node_name)) {
2832            return bs;
2833        }
2834    }
2835    return NULL;
2836}
2837
2838/* Put this QMP function here so it can access the static graph_bdrv_states. */
2839BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2840{
2841    BlockDeviceInfoList *list, *entry;
2842    BlockDriverState *bs;
2843
2844    list = NULL;
2845    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2846        BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp);
2847        if (!info) {
2848            qapi_free_BlockDeviceInfoList(list);
2849            return NULL;
2850        }
2851        entry = g_malloc0(sizeof(*entry));
2852        entry->value = info;
2853        entry->next = list;
2854        list = entry;
2855    }
2856
2857    return list;
2858}
2859
2860BlockDriverState *bdrv_lookup_bs(const char *device,
2861                                 const char *node_name,
2862                                 Error **errp)
2863{
2864    BlockBackend *blk;
2865    BlockDriverState *bs;
2866
2867    if (device) {
2868        blk = blk_by_name(device);
2869
2870        if (blk) {
2871            bs = blk_bs(blk);
2872            if (!bs) {
2873                error_setg(errp, "Device '%s' has no medium", device);
2874            }
2875
2876            return bs;
2877        }
2878    }
2879
2880    if (node_name) {
2881        bs = bdrv_find_node(node_name);
2882
2883        if (bs) {
2884            return bs;
2885        }
2886    }
2887
2888    error_setg(errp, "Cannot find device=%s nor node_name=%s",
2889                     device ? device : "",
2890                     node_name ? node_name : "");
2891    return NULL;
2892}
2893
2894/* If 'base' is in the same chain as 'top', return true. Otherwise,
2895 * return false.  If either argument is NULL, return false. */
2896bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2897{
2898    while (top && top != base) {
2899        top = backing_bs(top);
2900    }
2901
2902    return top != NULL;
2903}
2904
2905BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2906{
2907    if (!bs) {
2908        return QTAILQ_FIRST(&graph_bdrv_states);
2909    }
2910    return QTAILQ_NEXT(bs, node_list);
2911}
2912
2913/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2914 * the monitor or attached to a BlockBackend */
2915BlockDriverState *bdrv_next(BlockDriverState *bs)
2916{
2917    if (!bs || bs->blk) {
2918        bs = blk_next_root_bs(bs);
2919        if (bs) {
2920            return bs;
2921        }
2922    }
2923
2924    /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2925     * handled by the above block already */
2926    do {
2927        bs = bdrv_next_monitor_owned(bs);
2928    } while (bs && bs->blk);
2929    return bs;
2930}
2931
2932const char *bdrv_get_node_name(const BlockDriverState *bs)
2933{
2934    return bs->node_name;
2935}
2936
2937/* TODO check what callers really want: bs->node_name or blk_name() */
2938const char *bdrv_get_device_name(const BlockDriverState *bs)
2939{
2940    return bs->blk ? blk_name(bs->blk) : "";
2941}
2942
2943/* This can be used to identify nodes that might not have a device
2944 * name associated. Since node and device names live in the same
2945 * namespace, the result is unambiguous. The exception is if both are
2946 * absent, then this returns an empty (non-null) string. */
2947const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2948{
2949    return bs->blk ? blk_name(bs->blk) : bs->node_name;
2950}
2951
2952int bdrv_get_flags(BlockDriverState *bs)
2953{
2954    return bs->open_flags;
2955}
2956
2957int bdrv_has_zero_init_1(BlockDriverState *bs)
2958{
2959    return 1;
2960}
2961
2962int bdrv_has_zero_init(BlockDriverState *bs)
2963{
2964    assert(bs->drv);
2965
2966    /* If BS is a copy on write image, it is initialized to
2967       the contents of the base image, which may not be zeroes.  */
2968    if (bs->backing) {
2969        return 0;
2970    }
2971    if (bs->drv->bdrv_has_zero_init) {
2972        return bs->drv->bdrv_has_zero_init(bs);
2973    }
2974
2975    /* safe default */
2976    return 0;
2977}
2978
2979bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2980{
2981    BlockDriverInfo bdi;
2982
2983    if (bs->backing) {
2984        return false;
2985    }
2986
2987    if (bdrv_get_info(bs, &bdi) == 0) {
2988        return bdi.unallocated_blocks_are_zero;
2989    }
2990
2991    return false;
2992}
2993
2994bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2995{
2996    BlockDriverInfo bdi;
2997
2998    if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
2999        return false;
3000    }
3001
3002    if (bdrv_get_info(bs, &bdi) == 0) {
3003        return bdi.can_write_zeroes_with_unmap;
3004    }
3005
3006    return false;
3007}
3008
3009const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3010{
3011    if (bs->backing && bs->backing->bs->encrypted)
3012        return bs->backing_file;
3013    else if (bs->encrypted)
3014        return bs->filename;
3015    else
3016        return NULL;
3017}
3018
3019void bdrv_get_backing_filename(BlockDriverState *bs,
3020                               char *filename, int filename_size)
3021{
3022    pstrcpy(filename, filename_size, bs->backing_file);
3023}
3024
3025int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3026{
3027    BlockDriver *drv = bs->drv;
3028    if (!drv)
3029        return -ENOMEDIUM;
3030    if (!drv->bdrv_get_info)
3031        return -ENOTSUP;
3032    memset(bdi, 0, sizeof(*bdi));
3033    return drv->bdrv_get_info(bs, bdi);
3034}
3035
3036ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3037{
3038    BlockDriver *drv = bs->drv;
3039    if (drv && drv->bdrv_get_specific_info) {
3040        return drv->bdrv_get_specific_info(bs);
3041    }
3042    return NULL;
3043}
3044
3045void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3046{
3047    if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3048        return;
3049    }
3050
3051    bs->drv->bdrv_debug_event(bs, event);
3052}
3053
3054int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3055                          const char *tag)
3056{
3057    while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3058        bs = bs->file ? bs->file->bs : NULL;
3059    }
3060
3061    if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3062        return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3063    }
3064
3065    return -ENOTSUP;
3066}
3067
3068int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3069{
3070    while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3071        bs = bs->file ? bs->file->bs : NULL;
3072    }
3073
3074    if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3075        return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3076    }
3077
3078    return -ENOTSUP;
3079}
3080
3081int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3082{
3083    while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3084        bs = bs->file ? bs->file->bs : NULL;
3085    }
3086
3087    if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3088        return bs->drv->bdrv_debug_resume(bs, tag);
3089    }
3090
3091    return -ENOTSUP;
3092}
3093
3094bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3095{
3096    while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3097        bs = bs->file ? bs->file->bs : NULL;
3098    }
3099
3100    if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3101        return bs->drv->bdrv_debug_is_suspended(bs, tag);
3102    }
3103
3104    return false;
3105}
3106
3107int bdrv_is_snapshot(BlockDriverState *bs)
3108{
3109    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3110}
3111
3112/* backing_file can either be relative, or absolute, or a protocol.  If it is
3113 * relative, it must be relative to the chain.  So, passing in bs->filename
3114 * from a BDS as backing_file should not be done, as that may be relative to
3115 * the CWD rather than the chain. */
3116BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3117        const char *backing_file)
3118{
3119    char *filename_full = NULL;
3120    char *backing_file_full = NULL;
3121    char *filename_tmp = NULL;
3122    int is_protocol = 0;
3123    BlockDriverState *curr_bs = NULL;
3124    BlockDriverState *retval = NULL;
3125
3126    if (!bs || !bs->drv || !backing_file) {
3127        return NULL;
3128    }
3129
3130    filename_full     = g_malloc(PATH_MAX);
3131    backing_file_full = g_malloc(PATH_MAX);
3132    filename_tmp      = g_malloc(PATH_MAX);
3133
3134    is_protocol = path_has_protocol(backing_file);
3135
3136    for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3137
3138        /* If either of the filename paths is actually a protocol, then
3139         * compare unmodified paths; otherwise make paths relative */
3140        if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3141            if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3142                retval = curr_bs->backing->bs;
3143                break;
3144            }
3145        } else {
3146            /* If not an absolute filename path, make it relative to the current
3147             * image's filename path */
3148            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3149                         backing_file);
3150
3151            /* We are going to compare absolute pathnames */
3152            if (!realpath(filename_tmp, filename_full)) {
3153                continue;
3154            }
3155
3156            /* We need to make sure the backing filename we are comparing against
3157             * is relative to the current image filename (or absolute) */
3158            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3159                         curr_bs->backing_file);
3160
3161            if (!realpath(filename_tmp, backing_file_full)) {
3162                continue;
3163            }
3164
3165            if (strcmp(backing_file_full, filename_full) == 0) {
3166                retval = curr_bs->backing->bs;
3167                break;
3168            }
3169        }
3170    }
3171
3172    g_free(filename_full);
3173    g_free(backing_file_full);
3174    g_free(filename_tmp);
3175    return retval;
3176}
3177
3178int bdrv_get_backing_file_depth(BlockDriverState *bs)
3179{
3180    if (!bs->drv) {
3181        return 0;
3182    }
3183
3184    if (!bs->backing) {
3185        return 0;
3186    }
3187
3188    return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3189}
3190
3191void bdrv_init(void)
3192{
3193    module_call_init(MODULE_INIT_BLOCK);
3194}
3195
3196void bdrv_init_with_whitelist(void)
3197{
3198    use_bdrv_whitelist = 1;
3199    bdrv_init();
3200}
3201
3202void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3203{
3204    Error *local_err = NULL;
3205    int ret;
3206
3207    if (!bs->drv)  {
3208        return;
3209    }
3210
3211    if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3212        return;
3213    }
3214    bs->open_flags &= ~BDRV_O_INACTIVE;
3215
3216    if (bs->drv->bdrv_invalidate_cache) {
3217        bs->drv->bdrv_invalidate_cache(bs, &local_err);
3218    } else if (bs->file) {
3219        bdrv_invalidate_cache(bs->file->bs, &local_err);
3220    }
3221    if (local_err) {
3222        bs->open_flags |= BDRV_O_INACTIVE;
3223        error_propagate(errp, local_err);
3224        return;
3225    }
3226
3227    ret = refresh_total_sectors(bs, bs->total_sectors);
3228    if (ret < 0) {
3229        bs->open_flags |= BDRV_O_INACTIVE;
3230        error_setg_errno(errp, -ret, "Could not refresh total sector count");
3231        return;
3232    }
3233}
3234
3235void bdrv_invalidate_cache_all(Error **errp)
3236{
3237    BlockDriverState *bs = NULL;
3238    Error *local_err = NULL;
3239
3240    while ((bs = bdrv_next(bs)) != NULL) {
3241        AioContext *aio_context = bdrv_get_aio_context(bs);
3242
3243        aio_context_acquire(aio_context);
3244        bdrv_invalidate_cache(bs, &local_err);
3245        aio_context_release(aio_context);
3246        if (local_err) {
3247            error_propagate(errp, local_err);
3248            return;
3249        }
3250    }
3251}
3252
3253static int bdrv_inactivate(BlockDriverState *bs)
3254{
3255    int ret;
3256
3257    if (bs->drv->bdrv_inactivate) {
3258        ret = bs->drv->bdrv_inactivate(bs);
3259        if (ret < 0) {
3260            return ret;
3261        }
3262    }
3263
3264    bs->open_flags |= BDRV_O_INACTIVE;
3265    return 0;
3266}
3267
3268int bdrv_inactivate_all(void)
3269{
3270    BlockDriverState *bs = NULL;
3271    int ret;
3272
3273    while ((bs = bdrv_next(bs)) != NULL) {
3274        AioContext *aio_context = bdrv_get_aio_context(bs);
3275
3276        aio_context_acquire(aio_context);
3277        ret = bdrv_inactivate(bs);
3278        aio_context_release(aio_context);
3279        if (ret < 0) {
3280            return ret;
3281        }
3282    }
3283
3284    return 0;
3285}
3286
3287/**************************************************************/
3288/* removable device support */
3289
3290/**
3291 * Return TRUE if the media is present
3292 */
3293bool bdrv_is_inserted(BlockDriverState *bs)
3294{
3295    BlockDriver *drv = bs->drv;
3296    BdrvChild *child;
3297
3298    if (!drv) {
3299        return false;
3300    }
3301    if (drv->bdrv_is_inserted) {
3302        return drv->bdrv_is_inserted(bs);
3303    }
3304    QLIST_FOREACH(child, &bs->children, next) {
3305        if (!bdrv_is_inserted(child->bs)) {
3306            return false;
3307        }
3308    }
3309    return true;
3310}
3311
3312/**
3313 * Return whether the media changed since the last call to this
3314 * function, or -ENOTSUP if we don't know.  Most drivers don't know.
3315 */
3316int bdrv_media_changed(BlockDriverState *bs)
3317{
3318    BlockDriver *drv = bs->drv;
3319
3320    if (drv && drv->bdrv_media_changed) {
3321        return drv->bdrv_media_changed(bs);
3322    }
3323    return -ENOTSUP;
3324}
3325
3326/**
3327 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3328 */
3329void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3330{
3331    BlockDriver *drv = bs->drv;
3332    const char *device_name;
3333
3334    if (drv && drv->bdrv_eject) {
3335        drv->bdrv_eject(bs, eject_flag);
3336    }
3337
3338    device_name = bdrv_get_device_name(bs);
3339    if (device_name[0] != '\0') {
3340        qapi_event_send_device_tray_moved(device_name,
3341                                          eject_flag, &error_abort);
3342    }
3343}
3344
3345/**
3346 * Lock or unlock the media (if it is locked, the user won't be able
3347 * to eject it manually).
3348 */
3349void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3350{
3351    BlockDriver *drv = bs->drv;
3352
3353    trace_bdrv_lock_medium(bs, locked);
3354
3355    if (drv && drv->bdrv_lock_medium) {
3356        drv->bdrv_lock_medium(bs, locked);
3357    }
3358}
3359
3360/* Get a reference to bs */
3361void bdrv_ref(BlockDriverState *bs)
3362{
3363    bs->refcnt++;
3364}
3365
3366/* Release a previously grabbed reference to bs.
3367 * If after releasing, reference count is zero, the BlockDriverState is
3368 * deleted. */
3369void bdrv_unref(BlockDriverState *bs)
3370{
3371    if (!bs) {
3372        return;
3373    }
3374    assert(bs->refcnt > 0);
3375    if (--bs->refcnt == 0) {
3376        bdrv_delete(bs);
3377    }
3378}
3379
3380struct BdrvOpBlocker {
3381    Error *reason;
3382    QLIST_ENTRY(BdrvOpBlocker) list;
3383};
3384
3385bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3386{
3387    BdrvOpBlocker *blocker;
3388    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3389    if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3390        blocker = QLIST_FIRST(&bs->op_blockers[op]);
3391        if (errp) {
3392            *errp = error_copy(blocker->reason);
3393            error_prepend(errp, "Node '%s' is busy: ",
3394                          bdrv_get_device_or_node_name(bs));
3395        }
3396        return true;
3397    }
3398    return false;
3399}
3400
3401void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3402{
3403    BdrvOpBlocker *blocker;
3404    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3405
3406    blocker = g_new0(BdrvOpBlocker, 1);
3407    blocker->reason = reason;
3408    QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3409}
3410
3411void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3412{
3413    BdrvOpBlocker *blocker, *next;
3414    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3415    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3416        if (blocker->reason == reason) {
3417            QLIST_REMOVE(blocker, list);
3418            g_free(blocker);
3419        }
3420    }
3421}
3422
3423void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3424{
3425    int i;
3426    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3427        bdrv_op_block(bs, i, reason);
3428    }
3429}
3430
3431void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3432{
3433    int i;
3434    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3435        bdrv_op_unblock(bs, i, reason);
3436    }
3437}
3438
3439bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3440{
3441    int i;
3442
3443    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3444        if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3445            return false;
3446        }
3447    }
3448    return true;
3449}
3450
3451void bdrv_img_create(const char *filename, const char *fmt,
3452                     const char *base_filename, const char *base_fmt,
3453                     char *options, uint64_t img_size, int flags,
3454                     Error **errp, bool quiet)
3455{
3456    QemuOptsList *create_opts = NULL;
3457    QemuOpts *opts = NULL;
3458    const char *backing_fmt, *backing_file;
3459    int64_t size;
3460    BlockDriver *drv, *proto_drv;
3461    Error *local_err = NULL;
3462    int ret = 0;
3463
3464    /* Find driver and parse its options */
3465    drv = bdrv_find_format(fmt);
3466    if (!drv) {
3467        error_setg(errp, "Unknown file format '%s'", fmt);
3468        return;
3469    }
3470
3471    proto_drv = bdrv_find_protocol(filename, true, errp);
3472    if (!proto_drv) {
3473        return;
3474    }
3475
3476    if (!drv->create_opts) {
3477        error_setg(errp, "Format driver '%s' does not support image creation",
3478                   drv->format_name);
3479        return;
3480    }
3481
3482    if (!proto_drv->create_opts) {
3483        error_setg(errp, "Protocol driver '%s' does not support image creation",
3484                   proto_drv->format_name);
3485        return;
3486    }
3487
3488    create_opts = qemu_opts_append(create_opts, drv->create_opts);
3489    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3490
3491    /* Create parameter list with default values */
3492    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3493    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3494
3495    /* Parse -o options */
3496    if (options) {
3497        qemu_opts_do_parse(opts, options, NULL, &local_err);
3498        if (local_err) {
3499            error_report_err(local_err);
3500            local_err = NULL;
3501            error_setg(errp, "Invalid options for file format '%s'", fmt);
3502            goto out;
3503        }
3504    }
3505
3506    if (base_filename) {
3507        qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3508        if (local_err) {
3509            error_setg(errp, "Backing file not supported for file format '%s'",
3510                       fmt);
3511            goto out;
3512        }
3513    }
3514
3515    if (base_fmt) {
3516        qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3517        if (local_err) {
3518            error_setg(errp, "Backing file format not supported for file "
3519                             "format '%s'", fmt);
3520            goto out;
3521        }
3522    }
3523
3524    backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3525    if (backing_file) {
3526        if (!strcmp(filename, backing_file)) {
3527            error_setg(errp, "Error: Trying to create an image with the "
3528                             "same filename as the backing file");
3529            goto out;
3530        }
3531    }
3532
3533    backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3534
3535    // The size for the image must always be specified, with one exception:
3536    // If we are using a backing file, we can obtain the size from there
3537    size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3538    if (size == -1) {
3539        if (backing_file) {
3540            BlockDriverState *bs;
3541            char *full_backing = g_new0(char, PATH_MAX);
3542            int64_t size;
3543            int back_flags;
3544            QDict *backing_options = NULL;
3545
3546            bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3547                                                         full_backing, PATH_MAX,
3548                                                         &local_err);
3549            if (local_err) {
3550                g_free(full_backing);
3551                goto out;
3552            }
3553
3554            /* backing files always opened read-only */
3555            back_flags = flags;
3556            back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3557
3558            if (backing_fmt) {
3559                backing_options = qdict_new();
3560                qdict_put(backing_options, "driver",
3561                          qstring_from_str(backing_fmt));
3562            }
3563
3564            bs = NULL;
3565            ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3566                            back_flags, &local_err);
3567            g_free(full_backing);
3568            if (ret < 0) {
3569                goto out;
3570            }
3571            size = bdrv_getlength(bs);
3572            if (size < 0) {
3573                error_setg_errno(errp, -size, "Could not get size of '%s'",
3574                                 backing_file);
3575                bdrv_unref(bs);
3576                goto out;
3577            }
3578
3579            qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3580
3581            bdrv_unref(bs);
3582        } else {
3583            error_setg(errp, "Image creation needs a size parameter");
3584            goto out;
3585        }
3586    }
3587
3588    if (!quiet) {
3589        printf("Formatting '%s', fmt=%s ", filename, fmt);
3590        qemu_opts_print(opts, " ");
3591        puts("");
3592    }
3593
3594    ret = bdrv_create(drv, filename, opts, &local_err);
3595
3596    if (ret == -EFBIG) {
3597        /* This is generally a better message than whatever the driver would
3598         * deliver (especially because of the cluster_size_hint), since that
3599         * is most probably not much different from "image too large". */
3600        const char *cluster_size_hint = "";
3601        if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3602            cluster_size_hint = " (try using a larger cluster size)";
3603        }
3604        error_setg(errp, "The image size is too large for file format '%s'"
3605                   "%s", fmt, cluster_size_hint);
3606        error_free(local_err);
3607        local_err = NULL;
3608    }
3609
3610out:
3611    qemu_opts_del(opts);
3612    qemu_opts_free(create_opts);
3613    if (local_err) {
3614        error_propagate(errp, local_err);
3615    }
3616}
3617
3618AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3619{
3620    return bs->aio_context;
3621}
3622
3623void bdrv_detach_aio_context(BlockDriverState *bs)
3624{
3625    BdrvAioNotifier *baf;
3626
3627    if (!bs->drv) {
3628        return;
3629    }
3630
3631    QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3632        baf->detach_aio_context(baf->opaque);
3633    }
3634
3635    if (bs->throttle_state) {
3636        throttle_timers_detach_aio_context(&bs->throttle_timers);
3637    }
3638    if (bs->drv->bdrv_detach_aio_context) {
3639        bs->drv->bdrv_detach_aio_context(bs);
3640    }
3641    if (bs->file) {
3642        bdrv_detach_aio_context(bs->file->bs);
3643    }
3644    if (bs->backing) {
3645        bdrv_detach_aio_context(bs->backing->bs);
3646    }
3647
3648    bs->aio_context = NULL;
3649}
3650
3651void bdrv_attach_aio_context(BlockDriverState *bs,
3652                             AioContext *new_context)
3653{
3654    BdrvAioNotifier *ban;
3655
3656    if (!bs->drv) {
3657        return;
3658    }
3659
3660    bs->aio_context = new_context;
3661
3662    if (bs->backing) {
3663        bdrv_attach_aio_context(bs->backing->bs, new_context);
3664    }
3665    if (bs->file) {
3666        bdrv_attach_aio_context(bs->file->bs, new_context);
3667    }
3668    if (bs->drv->bdrv_attach_aio_context) {
3669        bs->drv->bdrv_attach_aio_context(bs, new_context);
3670    }
3671    if (bs->throttle_state) {
3672        throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3673    }
3674
3675    QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3676        ban->attached_aio_context(new_context, ban->opaque);
3677    }
3678}
3679
3680void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3681{
3682    bdrv_drain(bs); /* ensure there are no in-flight requests */
3683
3684    bdrv_detach_aio_context(bs);
3685
3686    /* This function executes in the old AioContext so acquire the new one in
3687     * case it runs in a different thread.
3688     */
3689    aio_context_acquire(new_context);
3690    bdrv_attach_aio_context(bs, new_context);
3691    aio_context_release(new_context);
3692}
3693
3694void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3695        void (*attached_aio_context)(AioContext *new_context, void *opaque),
3696        void (*detach_aio_context)(void *opaque), void *opaque)
3697{
3698    BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3699    *ban = (BdrvAioNotifier){
3700        .attached_aio_context = attached_aio_context,
3701        .detach_aio_context   = detach_aio_context,
3702        .opaque               = opaque
3703    };
3704
3705    QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3706}
3707
3708void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3709                                      void (*attached_aio_context)(AioContext *,
3710                                                                   void *),
3711                                      void (*detach_aio_context)(void *),
3712                                      void *opaque)
3713{
3714    BdrvAioNotifier *ban, *ban_next;
3715
3716    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3717        if (ban->attached_aio_context == attached_aio_context &&
3718            ban->detach_aio_context   == detach_aio_context   &&
3719            ban->opaque               == opaque)
3720        {
3721            QLIST_REMOVE(ban, list);
3722            g_free(ban);
3723
3724            return;
3725        }
3726    }
3727
3728    abort();
3729}
3730
3731int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3732                       BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
3733{
3734    if (!bs->drv->bdrv_amend_options) {
3735        return -ENOTSUP;
3736    }
3737    return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
3738}
3739
3740/* This function will be called by the bdrv_recurse_is_first_non_filter method
3741 * of block filter and by bdrv_is_first_non_filter.
3742 * It is used to test if the given bs is the candidate or recurse more in the
3743 * node graph.
3744 */
3745bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
3746                                      BlockDriverState *candidate)
3747{
3748    /* return false if basic checks fails */
3749    if (!bs || !bs->drv) {
3750        return false;
3751    }
3752
3753    /* the code reached a non block filter driver -> check if the bs is
3754     * the same as the candidate. It's the recursion termination condition.
3755     */
3756    if (!bs->drv->is_filter) {
3757        return bs == candidate;
3758    }
3759    /* Down this path the driver is a block filter driver */
3760
3761    /* If the block filter recursion method is defined use it to recurse down
3762     * the node graph.
3763     */
3764    if (bs->drv->bdrv_recurse_is_first_non_filter) {
3765        return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
3766    }
3767
3768    /* the driver is a block filter but don't allow to recurse -> return false
3769     */
3770    return false;
3771}
3772
3773/* This function checks if the candidate is the first non filter bs down it's
3774 * bs chain. Since we don't have pointers to parents it explore all bs chains
3775 * from the top. Some filters can choose not to pass down the recursion.
3776 */
3777bool bdrv_is_first_non_filter(BlockDriverState *candidate)
3778{
3779    BlockDriverState *bs = NULL;
3780
3781    /* walk down the bs forest recursively */
3782    while ((bs = bdrv_next(bs)) != NULL) {
3783        bool perm;
3784
3785        /* try to recurse in this top level bs */
3786        perm = bdrv_recurse_is_first_non_filter(bs, candidate);
3787
3788        /* candidate is the first non filter */
3789        if (perm) {
3790            return true;
3791        }
3792    }
3793
3794    return false;
3795}
3796
3797BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3798                                        const char *node_name, Error **errp)
3799{
3800    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
3801    AioContext *aio_context;
3802
3803    if (!to_replace_bs) {
3804        error_setg(errp, "Node name '%s' not found", node_name);
3805        return NULL;
3806    }
3807
3808    aio_context = bdrv_get_aio_context(to_replace_bs);
3809    aio_context_acquire(aio_context);
3810
3811    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
3812        to_replace_bs = NULL;
3813        goto out;
3814    }
3815
3816    /* We don't want arbitrary node of the BDS chain to be replaced only the top
3817     * most non filter in order to prevent data corruption.
3818     * Another benefit is that this tests exclude backing files which are
3819     * blocked by the backing blockers.
3820     */
3821    if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
3822        error_setg(errp, "Only top most non filter can be replaced");
3823        to_replace_bs = NULL;
3824        goto out;
3825    }
3826
3827out:
3828    aio_context_release(aio_context);
3829    return to_replace_bs;
3830}
3831
3832static bool append_open_options(QDict *d, BlockDriverState *bs)
3833{
3834    const QDictEntry *entry;
3835    QemuOptDesc *desc;
3836    BdrvChild *child;
3837    bool found_any = false;
3838    const char *p;
3839
3840    for (entry = qdict_first(bs->options); entry;
3841         entry = qdict_next(bs->options, entry))
3842    {
3843        /* Exclude options for children */
3844        QLIST_FOREACH(child, &bs->children, next) {
3845            if (strstart(qdict_entry_key(entry), child->name, &p)
3846                && (!*p || *p == '.'))
3847            {
3848                break;
3849            }
3850        }
3851        if (child) {
3852            continue;
3853        }
3854
3855        /* And exclude all non-driver-specific options */
3856        for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3857            if (!strcmp(qdict_entry_key(entry), desc->name)) {
3858                break;
3859            }
3860        }
3861        if (desc->name) {
3862            continue;
3863        }
3864
3865        qobject_incref(qdict_entry_value(entry));
3866        qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3867        found_any = true;
3868    }
3869
3870    return found_any;
3871}
3872
3873/* Updates the following BDS fields:
3874 *  - exact_filename: A filename which may be used for opening a block device
3875 *                    which (mostly) equals the given BDS (even without any
3876 *                    other options; so reading and writing must return the same
3877 *                    results, but caching etc. may be different)
3878 *  - full_open_options: Options which, when given when opening a block device
3879 *                       (without a filename), result in a BDS (mostly)
3880 *                       equalling the given one
3881 *  - filename: If exact_filename is set, it is copied here. Otherwise,
3882 *              full_open_options is converted to a JSON object, prefixed with
3883 *              "json:" (for use through the JSON pseudo protocol) and put here.
3884 */
3885void bdrv_refresh_filename(BlockDriverState *bs)
3886{
3887    BlockDriver *drv = bs->drv;
3888    QDict *opts;
3889
3890    if (!drv) {
3891        return;
3892    }
3893
3894    /* This BDS's file name will most probably depend on its file's name, so
3895     * refresh that first */
3896    if (bs->file) {
3897        bdrv_refresh_filename(bs->file->bs);
3898    }
3899
3900    if (drv->bdrv_refresh_filename) {
3901        /* Obsolete information is of no use here, so drop the old file name
3902         * information before refreshing it */
3903        bs->exact_filename[0] = '\0';
3904        if (bs->full_open_options) {
3905            QDECREF(bs->full_open_options);
3906            bs->full_open_options = NULL;
3907        }
3908
3909        opts = qdict_new();
3910        append_open_options(opts, bs);
3911        drv->bdrv_refresh_filename(bs, opts);
3912        QDECREF(opts);
3913    } else if (bs->file) {
3914        /* Try to reconstruct valid information from the underlying file */
3915        bool has_open_options;
3916
3917        bs->exact_filename[0] = '\0';
3918        if (bs->full_open_options) {
3919            QDECREF(bs->full_open_options);
3920            bs->full_open_options = NULL;
3921        }
3922
3923        opts = qdict_new();
3924        has_open_options = append_open_options(opts, bs);
3925
3926        /* If no specific options have been given for this BDS, the filename of
3927         * the underlying file should suffice for this one as well */
3928        if (bs->file->bs->exact_filename[0] && !has_open_options) {
3929            strcpy(bs->exact_filename, bs->file->bs->exact_filename);
3930        }
3931        /* Reconstructing the full options QDict is simple for most format block
3932         * drivers, as long as the full options are known for the underlying
3933         * file BDS. The full options QDict of that file BDS should somehow
3934         * contain a representation of the filename, therefore the following
3935         * suffices without querying the (exact_)filename of this BDS. */
3936        if (bs->file->bs->full_open_options) {
3937            qdict_put_obj(opts, "driver",
3938                          QOBJECT(qstring_from_str(drv->format_name)));
3939            QINCREF(bs->file->bs->full_open_options);
3940            qdict_put_obj(opts, "file",
3941                          QOBJECT(bs->file->bs->full_open_options));
3942
3943            bs->full_open_options = opts;
3944        } else {
3945            QDECREF(opts);
3946        }
3947    } else if (!bs->full_open_options && qdict_size(bs->options)) {
3948        /* There is no underlying file BDS (at least referenced by BDS.file),
3949         * so the full options QDict should be equal to the options given
3950         * specifically for this block device when it was opened (plus the
3951         * driver specification).
3952         * Because those options don't change, there is no need to update
3953         * full_open_options when it's already set. */
3954
3955        opts = qdict_new();
3956        append_open_options(opts, bs);
3957        qdict_put_obj(opts, "driver",
3958                      QOBJECT(qstring_from_str(drv->format_name)));
3959
3960        if (bs->exact_filename[0]) {
3961            /* This may not work for all block protocol drivers (some may
3962             * require this filename to be parsed), but we have to find some
3963             * default solution here, so just include it. If some block driver
3964             * does not support pure options without any filename at all or
3965             * needs some special format of the options QDict, it needs to
3966             * implement the driver-specific bdrv_refresh_filename() function.
3967             */
3968            qdict_put_obj(opts, "filename",
3969                          QOBJECT(qstring_from_str(bs->exact_filename)));
3970        }
3971
3972        bs->full_open_options = opts;
3973    }
3974
3975    if (bs->exact_filename[0]) {
3976        pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
3977    } else if (bs->full_open_options) {
3978        QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
3979        snprintf(bs->filename, sizeof(bs->filename), "json:%s",
3980                 qstring_get_str(json));
3981        QDECREF(json);
3982    }
3983}
3984