qemu/block.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator block driver
   3 *
   4 * Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "block/trace.h"
  27#include "block/block_int.h"
  28#include "block/blockjob.h"
  29#include "block/nbd.h"
  30#include "qemu/error-report.h"
  31#include "module_block.h"
  32#include "qemu/module.h"
  33#include "qapi/error.h"
  34#include "qapi/qmp/qdict.h"
  35#include "qapi/qmp/qjson.h"
  36#include "qapi/qmp/qnull.h"
  37#include "qapi/qmp/qstring.h"
  38#include "qapi/qobject-output-visitor.h"
  39#include "qapi/qapi-visit-block-core.h"
  40#include "sysemu/block-backend.h"
  41#include "sysemu/sysemu.h"
  42#include "qemu/notify.h"
  43#include "qemu/option.h"
  44#include "qemu/coroutine.h"
  45#include "block/qapi.h"
  46#include "qemu/timer.h"
  47#include "qemu/cutils.h"
  48#include "qemu/id.h"
  49
  50#ifdef CONFIG_BSD
  51#include <sys/ioctl.h>
  52#include <sys/queue.h>
  53#ifndef __DragonFly__
  54#include <sys/disk.h>
  55#endif
  56#endif
  57
  58#ifdef _WIN32
  59#include <windows.h>
  60#endif
  61
  62#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
  63
  64static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
  65    QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
  66
  67static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
  68    QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
  69
  70static QLIST_HEAD(, BlockDriver) bdrv_drivers =
  71    QLIST_HEAD_INITIALIZER(bdrv_drivers);
  72
  73static BlockDriverState *bdrv_open_inherit(const char *filename,
  74                                           const char *reference,
  75                                           QDict *options, int flags,
  76                                           BlockDriverState *parent,
  77                                           const BdrvChildRole *child_role,
  78                                           Error **errp);
  79
  80/* If non-zero, use only whitelisted block drivers */
  81static int use_bdrv_whitelist;
  82
  83#ifdef _WIN32
  84static int is_windows_drive_prefix(const char *filename)
  85{
  86    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
  87             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
  88            filename[1] == ':');
  89}
  90
  91int is_windows_drive(const char *filename)
  92{
  93    if (is_windows_drive_prefix(filename) &&
  94        filename[2] == '\0')
  95        return 1;
  96    if (strstart(filename, "\\\\.\\", NULL) ||
  97        strstart(filename, "//./", NULL))
  98        return 1;
  99    return 0;
 100}
 101#endif
 102
 103size_t bdrv_opt_mem_align(BlockDriverState *bs)
 104{
 105    if (!bs || !bs->drv) {
 106        /* page size or 4k (hdd sector size) should be on the safe side */
 107        return MAX(4096, getpagesize());
 108    }
 109
 110    return bs->bl.opt_mem_alignment;
 111}
 112
 113size_t bdrv_min_mem_align(BlockDriverState *bs)
 114{
 115    if (!bs || !bs->drv) {
 116        /* page size or 4k (hdd sector size) should be on the safe side */
 117        return MAX(4096, getpagesize());
 118    }
 119
 120    return bs->bl.min_mem_alignment;
 121}
 122
 123/* check if the path starts with "<protocol>:" */
 124int path_has_protocol(const char *path)
 125{
 126    const char *p;
 127
 128#ifdef _WIN32
 129    if (is_windows_drive(path) ||
 130        is_windows_drive_prefix(path)) {
 131        return 0;
 132    }
 133    p = path + strcspn(path, ":/\\");
 134#else
 135    p = path + strcspn(path, ":/");
 136#endif
 137
 138    return *p == ':';
 139}
 140
 141int path_is_absolute(const char *path)
 142{
 143#ifdef _WIN32
 144    /* specific case for names like: "\\.\d:" */
 145    if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
 146        return 1;
 147    }
 148    return (*path == '/' || *path == '\\');
 149#else
 150    return (*path == '/');
 151#endif
 152}
 153
 154/* if filename is absolute, just copy it to dest. Otherwise, build a
 155   path to it by considering it is relative to base_path. URL are
 156   supported. */
 157void path_combine(char *dest, int dest_size,
 158                  const char *base_path,
 159                  const char *filename)
 160{
 161    const char *p, *p1;
 162    int len;
 163
 164    if (dest_size <= 0)
 165        return;
 166    if (path_is_absolute(filename)) {
 167        pstrcpy(dest, dest_size, filename);
 168    } else {
 169        const char *protocol_stripped = NULL;
 170
 171        if (path_has_protocol(base_path)) {
 172            protocol_stripped = strchr(base_path, ':');
 173            if (protocol_stripped) {
 174                protocol_stripped++;
 175            }
 176        }
 177        p = protocol_stripped ?: base_path;
 178
 179        p1 = strrchr(base_path, '/');
 180#ifdef _WIN32
 181        {
 182            const char *p2;
 183            p2 = strrchr(base_path, '\\');
 184            if (!p1 || p2 > p1)
 185                p1 = p2;
 186        }
 187#endif
 188        if (p1)
 189            p1++;
 190        else
 191            p1 = base_path;
 192        if (p1 > p)
 193            p = p1;
 194        len = p - base_path;
 195        if (len > dest_size - 1)
 196            len = dest_size - 1;
 197        memcpy(dest, base_path, len);
 198        dest[len] = '\0';
 199        pstrcat(dest, dest_size, filename);
 200    }
 201}
 202
 203/*
 204 * Helper function for bdrv_parse_filename() implementations to remove optional
 205 * protocol prefixes (especially "file:") from a filename and for putting the
 206 * stripped filename into the options QDict if there is such a prefix.
 207 */
 208void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
 209                                      QDict *options)
 210{
 211    if (strstart(filename, prefix, &filename)) {
 212        /* Stripping the explicit protocol prefix may result in a protocol
 213         * prefix being (wrongly) detected (if the filename contains a colon) */
 214        if (path_has_protocol(filename)) {
 215            QString *fat_filename;
 216
 217            /* This means there is some colon before the first slash; therefore,
 218             * this cannot be an absolute path */
 219            assert(!path_is_absolute(filename));
 220
 221            /* And we can thus fix the protocol detection issue by prefixing it
 222             * by "./" */
 223            fat_filename = qstring_from_str("./");
 224            qstring_append(fat_filename, filename);
 225
 226            assert(!path_has_protocol(qstring_get_str(fat_filename)));
 227
 228            qdict_put(options, "filename", fat_filename);
 229        } else {
 230            /* If no protocol prefix was detected, we can use the shortened
 231             * filename as-is */
 232            qdict_put_str(options, "filename", filename);
 233        }
 234    }
 235}
 236
 237
 238/* Returns whether the image file is opened as read-only. Note that this can
 239 * return false and writing to the image file is still not possible because the
 240 * image is inactivated. */
 241bool bdrv_is_read_only(BlockDriverState *bs)
 242{
 243    return bs->read_only;
 244}
 245
 246int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
 247                           bool ignore_allow_rdw, Error **errp)
 248{
 249    /* Do not set read_only if copy_on_read is enabled */
 250    if (bs->copy_on_read && read_only) {
 251        error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
 252                   bdrv_get_device_or_node_name(bs));
 253        return -EINVAL;
 254    }
 255
 256    /* Do not clear read_only if it is prohibited */
 257    if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) &&
 258        !ignore_allow_rdw)
 259    {
 260        error_setg(errp, "Node '%s' is read only",
 261                   bdrv_get_device_or_node_name(bs));
 262        return -EPERM;
 263    }
 264
 265    return 0;
 266}
 267
 268/* TODO Remove (deprecated since 2.11)
 269 * Block drivers are not supposed to automatically change bs->read_only.
 270 * Instead, they should just check whether they can provide what the user
 271 * explicitly requested and error out if read-write is requested, but they can
 272 * only provide read-only access. */
 273int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
 274{
 275    int ret = 0;
 276
 277    ret = bdrv_can_set_read_only(bs, read_only, false, errp);
 278    if (ret < 0) {
 279        return ret;
 280    }
 281
 282    bs->read_only = read_only;
 283    return 0;
 284}
 285
 286void bdrv_get_full_backing_filename_from_filename(const char *backed,
 287                                                  const char *backing,
 288                                                  char *dest, size_t sz,
 289                                                  Error **errp)
 290{
 291    if (backing[0] == '\0' || path_has_protocol(backing) ||
 292        path_is_absolute(backing))
 293    {
 294        pstrcpy(dest, sz, backing);
 295    } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
 296        error_setg(errp, "Cannot use relative backing file names for '%s'",
 297                   backed);
 298    } else {
 299        path_combine(dest, sz, backed, backing);
 300    }
 301}
 302
 303void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
 304                                    Error **errp)
 305{
 306    char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
 307
 308    bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
 309                                                 dest, sz, errp);
 310}
 311
 312void bdrv_register(BlockDriver *bdrv)
 313{
 314    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 315}
 316
 317BlockDriverState *bdrv_new(void)
 318{
 319    BlockDriverState *bs;
 320    int i;
 321
 322    bs = g_new0(BlockDriverState, 1);
 323    QLIST_INIT(&bs->dirty_bitmaps);
 324    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
 325        QLIST_INIT(&bs->op_blockers[i]);
 326    }
 327    notifier_with_return_list_init(&bs->before_write_notifiers);
 328    qemu_co_mutex_init(&bs->reqs_lock);
 329    qemu_mutex_init(&bs->dirty_bitmap_mutex);
 330    bs->refcnt = 1;
 331    bs->aio_context = qemu_get_aio_context();
 332
 333    qemu_co_queue_init(&bs->flush_queue);
 334
 335    QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
 336
 337    return bs;
 338}
 339
 340static BlockDriver *bdrv_do_find_format(const char *format_name)
 341{
 342    BlockDriver *drv1;
 343
 344    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
 345        if (!strcmp(drv1->format_name, format_name)) {
 346            return drv1;
 347        }
 348    }
 349
 350    return NULL;
 351}
 352
 353BlockDriver *bdrv_find_format(const char *format_name)
 354{
 355    BlockDriver *drv1;
 356    int i;
 357
 358    drv1 = bdrv_do_find_format(format_name);
 359    if (drv1) {
 360        return drv1;
 361    }
 362
 363    /* The driver isn't registered, maybe we need to load a module */
 364    for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
 365        if (!strcmp(block_driver_modules[i].format_name, format_name)) {
 366            block_module_load_one(block_driver_modules[i].library_name);
 367            break;
 368        }
 369    }
 370
 371    return bdrv_do_find_format(format_name);
 372}
 373
 374int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
 375{
 376    static const char *whitelist_rw[] = {
 377        CONFIG_BDRV_RW_WHITELIST
 378    };
 379    static const char *whitelist_ro[] = {
 380        CONFIG_BDRV_RO_WHITELIST
 381    };
 382    const char **p;
 383
 384    if (!whitelist_rw[0] && !whitelist_ro[0]) {
 385        return 1;               /* no whitelist, anything goes */
 386    }
 387
 388    for (p = whitelist_rw; *p; p++) {
 389        if (!strcmp(drv->format_name, *p)) {
 390            return 1;
 391        }
 392    }
 393    if (read_only) {
 394        for (p = whitelist_ro; *p; p++) {
 395            if (!strcmp(drv->format_name, *p)) {
 396                return 1;
 397            }
 398        }
 399    }
 400    return 0;
 401}
 402
 403bool bdrv_uses_whitelist(void)
 404{
 405    return use_bdrv_whitelist;
 406}
 407
 408typedef struct CreateCo {
 409    BlockDriver *drv;
 410    char *filename;
 411    QemuOpts *opts;
 412    int ret;
 413    Error *err;
 414} CreateCo;
 415
 416static void coroutine_fn bdrv_create_co_entry(void *opaque)
 417{
 418    Error *local_err = NULL;
 419    int ret;
 420
 421    CreateCo *cco = opaque;
 422    assert(cco->drv);
 423
 424    ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err);
 425    error_propagate(&cco->err, local_err);
 426    cco->ret = ret;
 427}
 428
 429int bdrv_create(BlockDriver *drv, const char* filename,
 430                QemuOpts *opts, Error **errp)
 431{
 432    int ret;
 433
 434    Coroutine *co;
 435    CreateCo cco = {
 436        .drv = drv,
 437        .filename = g_strdup(filename),
 438        .opts = opts,
 439        .ret = NOT_DONE,
 440        .err = NULL,
 441    };
 442
 443    if (!drv->bdrv_co_create_opts) {
 444        error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
 445        ret = -ENOTSUP;
 446        goto out;
 447    }
 448
 449    if (qemu_in_coroutine()) {
 450        /* Fast-path if already in coroutine context */
 451        bdrv_create_co_entry(&cco);
 452    } else {
 453        co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
 454        qemu_coroutine_enter(co);
 455        while (cco.ret == NOT_DONE) {
 456            aio_poll(qemu_get_aio_context(), true);
 457        }
 458    }
 459
 460    ret = cco.ret;
 461    if (ret < 0) {
 462        if (cco.err) {
 463            error_propagate(errp, cco.err);
 464        } else {
 465            error_setg_errno(errp, -ret, "Could not create image");
 466        }
 467    }
 468
 469out:
 470    g_free(cco.filename);
 471    return ret;
 472}
 473
 474int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
 475{
 476    BlockDriver *drv;
 477    Error *local_err = NULL;
 478    int ret;
 479
 480    drv = bdrv_find_protocol(filename, true, errp);
 481    if (drv == NULL) {
 482        return -ENOENT;
 483    }
 484
 485    ret = bdrv_create(drv, filename, opts, &local_err);
 486    error_propagate(errp, local_err);
 487    return ret;
 488}
 489
 490/**
 491 * Try to get @bs's logical and physical block size.
 492 * On success, store them in @bsz struct and return 0.
 493 * On failure return -errno.
 494 * @bs must not be empty.
 495 */
 496int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 497{
 498    BlockDriver *drv = bs->drv;
 499
 500    if (drv && drv->bdrv_probe_blocksizes) {
 501        return drv->bdrv_probe_blocksizes(bs, bsz);
 502    } else if (drv && drv->is_filter && bs->file) {
 503        return bdrv_probe_blocksizes(bs->file->bs, bsz);
 504    }
 505
 506    return -ENOTSUP;
 507}
 508
 509/**
 510 * Try to get @bs's geometry (cyls, heads, sectors).
 511 * On success, store them in @geo struct and return 0.
 512 * On failure return -errno.
 513 * @bs must not be empty.
 514 */
 515int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 516{
 517    BlockDriver *drv = bs->drv;
 518
 519    if (drv && drv->bdrv_probe_geometry) {
 520        return drv->bdrv_probe_geometry(bs, geo);
 521    } else if (drv && drv->is_filter && bs->file) {
 522        return bdrv_probe_geometry(bs->file->bs, geo);
 523    }
 524
 525    return -ENOTSUP;
 526}
 527
 528/*
 529 * Create a uniquely-named empty temporary file.
 530 * Return 0 upon success, otherwise a negative errno value.
 531 */
 532int get_tmp_filename(char *filename, int size)
 533{
 534#ifdef _WIN32
 535    char temp_dir[MAX_PATH];
 536    /* GetTempFileName requires that its output buffer (4th param)
 537       have length MAX_PATH or greater.  */
 538    assert(size >= MAX_PATH);
 539    return (GetTempPath(MAX_PATH, temp_dir)
 540            && GetTempFileName(temp_dir, "qem", 0, filename)
 541            ? 0 : -GetLastError());
 542#else
 543    int fd;
 544    const char *tmpdir;
 545    tmpdir = getenv("TMPDIR");
 546    if (!tmpdir) {
 547        tmpdir = "/var/tmp";
 548    }
 549    if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
 550        return -EOVERFLOW;
 551    }
 552    fd = mkstemp(filename);
 553    if (fd < 0) {
 554        return -errno;
 555    }
 556    if (close(fd) != 0) {
 557        unlink(filename);
 558        return -errno;
 559    }
 560    return 0;
 561#endif
 562}
 563
 564/*
 565 * Detect host devices. By convention, /dev/cdrom[N] is always
 566 * recognized as a host CDROM.
 567 */
 568static BlockDriver *find_hdev_driver(const char *filename)
 569{
 570    int score_max = 0, score;
 571    BlockDriver *drv = NULL, *d;
 572
 573    QLIST_FOREACH(d, &bdrv_drivers, list) {
 574        if (d->bdrv_probe_device) {
 575            score = d->bdrv_probe_device(filename);
 576            if (score > score_max) {
 577                score_max = score;
 578                drv = d;
 579            }
 580        }
 581    }
 582
 583    return drv;
 584}
 585
 586static BlockDriver *bdrv_do_find_protocol(const char *protocol)
 587{
 588    BlockDriver *drv1;
 589
 590    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
 591        if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
 592            return drv1;
 593        }
 594    }
 595
 596    return NULL;
 597}
 598
 599BlockDriver *bdrv_find_protocol(const char *filename,
 600                                bool allow_protocol_prefix,
 601                                Error **errp)
 602{
 603    BlockDriver *drv1;
 604    char protocol[128];
 605    int len;
 606    const char *p;
 607    int i;
 608
 609    /* TODO Drivers without bdrv_file_open must be specified explicitly */
 610
 611    /*
 612     * XXX(hch): we really should not let host device detection
 613     * override an explicit protocol specification, but moving this
 614     * later breaks access to device names with colons in them.
 615     * Thanks to the brain-dead persistent naming schemes on udev-
 616     * based Linux systems those actually are quite common.
 617     */
 618    drv1 = find_hdev_driver(filename);
 619    if (drv1) {
 620        return drv1;
 621    }
 622
 623    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
 624        return &bdrv_file;
 625    }
 626
 627    p = strchr(filename, ':');
 628    assert(p != NULL);
 629    len = p - filename;
 630    if (len > sizeof(protocol) - 1)
 631        len = sizeof(protocol) - 1;
 632    memcpy(protocol, filename, len);
 633    protocol[len] = '\0';
 634
 635    drv1 = bdrv_do_find_protocol(protocol);
 636    if (drv1) {
 637        return drv1;
 638    }
 639
 640    for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
 641        if (block_driver_modules[i].protocol_name &&
 642            !strcmp(block_driver_modules[i].protocol_name, protocol)) {
 643            block_module_load_one(block_driver_modules[i].library_name);
 644            break;
 645        }
 646    }
 647
 648    drv1 = bdrv_do_find_protocol(protocol);
 649    if (!drv1) {
 650        error_setg(errp, "Unknown protocol '%s'", protocol);
 651    }
 652    return drv1;
 653}
 654
 655/*
 656 * Guess image format by probing its contents.
 657 * This is not a good idea when your image is raw (CVE-2008-2004), but
 658 * we do it anyway for backward compatibility.
 659 *
 660 * @buf         contains the image's first @buf_size bytes.
 661 * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
 662 *              but can be smaller if the image file is smaller)
 663 * @filename    is its filename.
 664 *
 665 * For all block drivers, call the bdrv_probe() method to get its
 666 * probing score.
 667 * Return the first block driver with the highest probing score.
 668 */
 669BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
 670                            const char *filename)
 671{
 672    int score_max = 0, score;
 673    BlockDriver *drv = NULL, *d;
 674
 675    QLIST_FOREACH(d, &bdrv_drivers, list) {
 676        if (d->bdrv_probe) {
 677            score = d->bdrv_probe(buf, buf_size, filename);
 678            if (score > score_max) {
 679                score_max = score;
 680                drv = d;
 681            }
 682        }
 683    }
 684
 685    return drv;
 686}
 687
 688static int find_image_format(BlockBackend *file, const char *filename,
 689                             BlockDriver **pdrv, Error **errp)
 690{
 691    BlockDriver *drv;
 692    uint8_t buf[BLOCK_PROBE_BUF_SIZE];
 693    int ret = 0;
 694
 695    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
 696    if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
 697        *pdrv = &bdrv_raw;
 698        return ret;
 699    }
 700
 701    ret = blk_pread(file, 0, buf, sizeof(buf));
 702    if (ret < 0) {
 703        error_setg_errno(errp, -ret, "Could not read image for determining its "
 704                         "format");
 705        *pdrv = NULL;
 706        return ret;
 707    }
 708
 709    drv = bdrv_probe_all(buf, ret, filename);
 710    if (!drv) {
 711        error_setg(errp, "Could not determine image format: No compatible "
 712                   "driver found");
 713        ret = -ENOENT;
 714    }
 715    *pdrv = drv;
 716    return ret;
 717}
 718
 719/**
 720 * Set the current 'total_sectors' value
 721 * Return 0 on success, -errno on error.
 722 */
 723static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
 724{
 725    BlockDriver *drv = bs->drv;
 726
 727    if (!drv) {
 728        return -ENOMEDIUM;
 729    }
 730
 731    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
 732    if (bdrv_is_sg(bs))
 733        return 0;
 734
 735    /* query actual device if possible, otherwise just trust the hint */
 736    if (drv->bdrv_getlength) {
 737        int64_t length = drv->bdrv_getlength(bs);
 738        if (length < 0) {
 739            return length;
 740        }
 741        hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
 742    }
 743
 744    bs->total_sectors = hint;
 745    return 0;
 746}
 747
 748/**
 749 * Combines a QDict of new block driver @options with any missing options taken
 750 * from @old_options, so that leaving out an option defaults to its old value.
 751 */
 752static void bdrv_join_options(BlockDriverState *bs, QDict *options,
 753                              QDict *old_options)
 754{
 755    if (bs->drv && bs->drv->bdrv_join_options) {
 756        bs->drv->bdrv_join_options(options, old_options);
 757    } else {
 758        qdict_join(options, old_options, false);
 759    }
 760}
 761
 762/**
 763 * Set open flags for a given discard mode
 764 *
 765 * Return 0 on success, -1 if the discard mode was invalid.
 766 */
 767int bdrv_parse_discard_flags(const char *mode, int *flags)
 768{
 769    *flags &= ~BDRV_O_UNMAP;
 770
 771    if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
 772        /* do nothing */
 773    } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
 774        *flags |= BDRV_O_UNMAP;
 775    } else {
 776        return -1;
 777    }
 778
 779    return 0;
 780}
 781
 782/**
 783 * Set open flags for a given cache mode
 784 *
 785 * Return 0 on success, -1 if the cache mode was invalid.
 786 */
 787int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
 788{
 789    *flags &= ~BDRV_O_CACHE_MASK;
 790
 791    if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
 792        *writethrough = false;
 793        *flags |= BDRV_O_NOCACHE;
 794    } else if (!strcmp(mode, "directsync")) {
 795        *writethrough = true;
 796        *flags |= BDRV_O_NOCACHE;
 797    } else if (!strcmp(mode, "writeback")) {
 798        *writethrough = false;
 799    } else if (!strcmp(mode, "unsafe")) {
 800        *writethrough = false;
 801        *flags |= BDRV_O_NO_FLUSH;
 802    } else if (!strcmp(mode, "writethrough")) {
 803        *writethrough = true;
 804    } else {
 805        return -1;
 806    }
 807
 808    return 0;
 809}
 810
 811static char *bdrv_child_get_parent_desc(BdrvChild *c)
 812{
 813    BlockDriverState *parent = c->opaque;
 814    return g_strdup(bdrv_get_device_or_node_name(parent));
 815}
 816
 817static void bdrv_child_cb_drained_begin(BdrvChild *child)
 818{
 819    BlockDriverState *bs = child->opaque;
 820    bdrv_drained_begin(bs);
 821}
 822
 823static void bdrv_child_cb_drained_end(BdrvChild *child)
 824{
 825    BlockDriverState *bs = child->opaque;
 826    bdrv_drained_end(bs);
 827}
 828
 829static void bdrv_child_cb_attach(BdrvChild *child)
 830{
 831    BlockDriverState *bs = child->opaque;
 832    bdrv_apply_subtree_drain(child, bs);
 833}
 834
 835static void bdrv_child_cb_detach(BdrvChild *child)
 836{
 837    BlockDriverState *bs = child->opaque;
 838    bdrv_unapply_subtree_drain(child, bs);
 839}
 840
 841static int bdrv_child_cb_inactivate(BdrvChild *child)
 842{
 843    BlockDriverState *bs = child->opaque;
 844    assert(bs->open_flags & BDRV_O_INACTIVE);
 845    return 0;
 846}
 847
 848/*
 849 * Returns the options and flags that a temporary snapshot should get, based on
 850 * the originally requested flags (the originally requested image will have
 851 * flags like a backing file)
 852 */
 853static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
 854                                       int parent_flags, QDict *parent_options)
 855{
 856    *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
 857
 858    /* For temporary files, unconditional cache=unsafe is fine */
 859    qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
 860    qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
 861
 862    /* Copy the read-only option from the parent */
 863    qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
 864
 865    /* aio=native doesn't work for cache.direct=off, so disable it for the
 866     * temporary snapshot */
 867    *child_flags &= ~BDRV_O_NATIVE_AIO;
 868}
 869
 870/*
 871 * Returns the options and flags that bs->file should get if a protocol driver
 872 * is expected, based on the given options and flags for the parent BDS
 873 */
 874static void bdrv_inherited_options(int *child_flags, QDict *child_options,
 875                                   int parent_flags, QDict *parent_options)
 876{
 877    int flags = parent_flags;
 878
 879    /* Enable protocol handling, disable format probing for bs->file */
 880    flags |= BDRV_O_PROTOCOL;
 881
 882    /* If the cache mode isn't explicitly set, inherit direct and no-flush from
 883     * the parent. */
 884    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
 885    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
 886    qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
 887
 888    /* Inherit the read-only option from the parent if it's not set */
 889    qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
 890
 891    /* Our block drivers take care to send flushes and respect unmap policy,
 892     * so we can default to enable both on lower layers regardless of the
 893     * corresponding parent options. */
 894    qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
 895
 896    /* Clear flags that only apply to the top layer */
 897    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
 898               BDRV_O_NO_IO);
 899
 900    *child_flags = flags;
 901}
 902
 903const BdrvChildRole child_file = {
 904    .get_parent_desc = bdrv_child_get_parent_desc,
 905    .inherit_options = bdrv_inherited_options,
 906    .drained_begin   = bdrv_child_cb_drained_begin,
 907    .drained_end     = bdrv_child_cb_drained_end,
 908    .attach          = bdrv_child_cb_attach,
 909    .detach          = bdrv_child_cb_detach,
 910    .inactivate      = bdrv_child_cb_inactivate,
 911};
 912
 913/*
 914 * Returns the options and flags that bs->file should get if the use of formats
 915 * (and not only protocols) is permitted for it, based on the given options and
 916 * flags for the parent BDS
 917 */
 918static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
 919                                       int parent_flags, QDict *parent_options)
 920{
 921    child_file.inherit_options(child_flags, child_options,
 922                               parent_flags, parent_options);
 923
 924    *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
 925}
 926
 927const BdrvChildRole child_format = {
 928    .get_parent_desc = bdrv_child_get_parent_desc,
 929    .inherit_options = bdrv_inherited_fmt_options,
 930    .drained_begin   = bdrv_child_cb_drained_begin,
 931    .drained_end     = bdrv_child_cb_drained_end,
 932    .attach          = bdrv_child_cb_attach,
 933    .detach          = bdrv_child_cb_detach,
 934    .inactivate      = bdrv_child_cb_inactivate,
 935};
 936
 937static void bdrv_backing_attach(BdrvChild *c)
 938{
 939    BlockDriverState *parent = c->opaque;
 940    BlockDriverState *backing_hd = c->bs;
 941
 942    assert(!parent->backing_blocker);
 943    error_setg(&parent->backing_blocker,
 944               "node is used as backing hd of '%s'",
 945               bdrv_get_device_or_node_name(parent));
 946
 947    parent->open_flags &= ~BDRV_O_NO_BACKING;
 948    pstrcpy(parent->backing_file, sizeof(parent->backing_file),
 949            backing_hd->filename);
 950    pstrcpy(parent->backing_format, sizeof(parent->backing_format),
 951            backing_hd->drv ? backing_hd->drv->format_name : "");
 952
 953    bdrv_op_block_all(backing_hd, parent->backing_blocker);
 954    /* Otherwise we won't be able to commit or stream */
 955    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
 956                    parent->backing_blocker);
 957    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
 958                    parent->backing_blocker);
 959    /*
 960     * We do backup in 3 ways:
 961     * 1. drive backup
 962     *    The target bs is new opened, and the source is top BDS
 963     * 2. blockdev backup
 964     *    Both the source and the target are top BDSes.
 965     * 3. internal backup(used for block replication)
 966     *    Both the source and the target are backing file
 967     *
 968     * In case 1 and 2, neither the source nor the target is the backing file.
 969     * In case 3, we will block the top BDS, so there is only one block job
 970     * for the top BDS and its backing chain.
 971     */
 972    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
 973                    parent->backing_blocker);
 974    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
 975                    parent->backing_blocker);
 976
 977    bdrv_child_cb_attach(c);
 978}
 979
 980static void bdrv_backing_detach(BdrvChild *c)
 981{
 982    BlockDriverState *parent = c->opaque;
 983
 984    assert(parent->backing_blocker);
 985    bdrv_op_unblock_all(c->bs, parent->backing_blocker);
 986    error_free(parent->backing_blocker);
 987    parent->backing_blocker = NULL;
 988
 989    bdrv_child_cb_detach(c);
 990}
 991
 992/*
 993 * Returns the options and flags that bs->backing should get, based on the
 994 * given options and flags for the parent BDS
 995 */
 996static void bdrv_backing_options(int *child_flags, QDict *child_options,
 997                                 int parent_flags, QDict *parent_options)
 998{
 999    int flags = parent_flags;
1000
1001    /* The cache mode is inherited unmodified for backing files; except WCE,
1002     * which is only applied on the top level (BlockBackend) */
1003    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
1004    qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
1005    qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
1006
1007    /* backing files always opened read-only */
1008    qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
1009    flags &= ~BDRV_O_COPY_ON_READ;
1010
1011    /* snapshot=on is handled on the top layer */
1012    flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
1013
1014    *child_flags = flags;
1015}
1016
1017static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
1018                                        const char *filename, Error **errp)
1019{
1020    BlockDriverState *parent = c->opaque;
1021    int orig_flags = bdrv_get_flags(parent);
1022    int ret;
1023
1024    if (!(orig_flags & BDRV_O_RDWR)) {
1025        ret = bdrv_reopen(parent, orig_flags | BDRV_O_RDWR, errp);
1026        if (ret < 0) {
1027            return ret;
1028        }
1029    }
1030
1031    ret = bdrv_change_backing_file(parent, filename,
1032                                   base->drv ? base->drv->format_name : "");
1033    if (ret < 0) {
1034        error_setg_errno(errp, -ret, "Could not update backing file link");
1035    }
1036
1037    if (!(orig_flags & BDRV_O_RDWR)) {
1038        bdrv_reopen(parent, orig_flags, NULL);
1039    }
1040
1041    return ret;
1042}
1043
1044const BdrvChildRole child_backing = {
1045    .get_parent_desc = bdrv_child_get_parent_desc,
1046    .attach          = bdrv_backing_attach,
1047    .detach          = bdrv_backing_detach,
1048    .inherit_options = bdrv_backing_options,
1049    .drained_begin   = bdrv_child_cb_drained_begin,
1050    .drained_end     = bdrv_child_cb_drained_end,
1051    .inactivate      = bdrv_child_cb_inactivate,
1052    .update_filename = bdrv_backing_update_filename,
1053};
1054
1055static int bdrv_open_flags(BlockDriverState *bs, int flags)
1056{
1057    int open_flags = flags;
1058
1059    /*
1060     * Clear flags that are internal to the block layer before opening the
1061     * image.
1062     */
1063    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
1064
1065    /*
1066     * Snapshots should be writable.
1067     */
1068    if (flags & BDRV_O_TEMPORARY) {
1069        open_flags |= BDRV_O_RDWR;
1070    }
1071
1072    return open_flags;
1073}
1074
1075static void update_flags_from_options(int *flags, QemuOpts *opts)
1076{
1077    *flags &= ~BDRV_O_CACHE_MASK;
1078
1079    assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
1080    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
1081        *flags |= BDRV_O_NO_FLUSH;
1082    }
1083
1084    assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
1085    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
1086        *flags |= BDRV_O_NOCACHE;
1087    }
1088
1089    *flags &= ~BDRV_O_RDWR;
1090
1091    assert(qemu_opt_find(opts, BDRV_OPT_READ_ONLY));
1092    if (!qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false)) {
1093        *flags |= BDRV_O_RDWR;
1094    }
1095
1096}
1097
1098static void update_options_from_flags(QDict *options, int flags)
1099{
1100    if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
1101        qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
1102    }
1103    if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
1104        qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
1105                       flags & BDRV_O_NO_FLUSH);
1106    }
1107    if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
1108        qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
1109    }
1110}
1111
1112static void bdrv_assign_node_name(BlockDriverState *bs,
1113                                  const char *node_name,
1114                                  Error **errp)
1115{
1116    char *gen_node_name = NULL;
1117
1118    if (!node_name) {
1119        node_name = gen_node_name = id_generate(ID_BLOCK);
1120    } else if (!id_wellformed(node_name)) {
1121        /*
1122         * Check for empty string or invalid characters, but not if it is
1123         * generated (generated names use characters not available to the user)
1124         */
1125        error_setg(errp, "Invalid node name");
1126        return;
1127    }
1128
1129    /* takes care of avoiding namespaces collisions */
1130    if (blk_by_name(node_name)) {
1131        error_setg(errp, "node-name=%s is conflicting with a device id",
1132                   node_name);
1133        goto out;
1134    }
1135
1136    /* takes care of avoiding duplicates node names */
1137    if (bdrv_find_node(node_name)) {
1138        error_setg(errp, "Duplicate node name");
1139        goto out;
1140    }
1141
1142    /* copy node name into the bs and insert it into the graph list */
1143    pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
1144    QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
1145out:
1146    g_free(gen_node_name);
1147}
1148
1149static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
1150                            const char *node_name, QDict *options,
1151                            int open_flags, Error **errp)
1152{
1153    Error *local_err = NULL;
1154    int ret;
1155
1156    bdrv_assign_node_name(bs, node_name, &local_err);
1157    if (local_err) {
1158        error_propagate(errp, local_err);
1159        return -EINVAL;
1160    }
1161
1162    bs->drv = drv;
1163    bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
1164    bs->opaque = g_malloc0(drv->instance_size);
1165
1166    if (drv->bdrv_file_open) {
1167        assert(!drv->bdrv_needs_filename || bs->filename[0]);
1168        ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1169    } else if (drv->bdrv_open) {
1170        ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1171    } else {
1172        ret = 0;
1173    }
1174
1175    if (ret < 0) {
1176        if (local_err) {
1177            error_propagate(errp, local_err);
1178        } else if (bs->filename[0]) {
1179            error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1180        } else {
1181            error_setg_errno(errp, -ret, "Could not open image");
1182        }
1183        goto open_failed;
1184    }
1185
1186    ret = refresh_total_sectors(bs, bs->total_sectors);
1187    if (ret < 0) {
1188        error_setg_errno(errp, -ret, "Could not refresh total sector count");
1189        return ret;
1190    }
1191
1192    bdrv_refresh_limits(bs, &local_err);
1193    if (local_err) {
1194        error_propagate(errp, local_err);
1195        return -EINVAL;
1196    }
1197
1198    assert(bdrv_opt_mem_align(bs) != 0);
1199    assert(bdrv_min_mem_align(bs) != 0);
1200    assert(is_power_of_2(bs->bl.request_alignment));
1201
1202    return 0;
1203open_failed:
1204    bs->drv = NULL;
1205    if (bs->file != NULL) {
1206        bdrv_unref_child(bs, bs->file);
1207        bs->file = NULL;
1208    }
1209    g_free(bs->opaque);
1210    bs->opaque = NULL;
1211    return ret;
1212}
1213
1214BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
1215                                       int flags, Error **errp)
1216{
1217    BlockDriverState *bs;
1218    int ret;
1219
1220    bs = bdrv_new();
1221    bs->open_flags = flags;
1222    bs->explicit_options = qdict_new();
1223    bs->options = qdict_new();
1224    bs->opaque = NULL;
1225
1226    update_options_from_flags(bs->options, flags);
1227
1228    ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
1229    if (ret < 0) {
1230        QDECREF(bs->explicit_options);
1231        bs->explicit_options = NULL;
1232        QDECREF(bs->options);
1233        bs->options = NULL;
1234        bdrv_unref(bs);
1235        return NULL;
1236    }
1237
1238    return bs;
1239}
1240
1241QemuOptsList bdrv_runtime_opts = {
1242    .name = "bdrv_common",
1243    .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
1244    .desc = {
1245        {
1246            .name = "node-name",
1247            .type = QEMU_OPT_STRING,
1248            .help = "Node name of the block device node",
1249        },
1250        {
1251            .name = "driver",
1252            .type = QEMU_OPT_STRING,
1253            .help = "Block driver to use for the node",
1254        },
1255        {
1256            .name = BDRV_OPT_CACHE_DIRECT,
1257            .type = QEMU_OPT_BOOL,
1258            .help = "Bypass software writeback cache on the host",
1259        },
1260        {
1261            .name = BDRV_OPT_CACHE_NO_FLUSH,
1262            .type = QEMU_OPT_BOOL,
1263            .help = "Ignore flush requests",
1264        },
1265        {
1266            .name = BDRV_OPT_READ_ONLY,
1267            .type = QEMU_OPT_BOOL,
1268            .help = "Node is opened in read-only mode",
1269        },
1270        {
1271            .name = "detect-zeroes",
1272            .type = QEMU_OPT_STRING,
1273            .help = "try to optimize zero writes (off, on, unmap)",
1274        },
1275        {
1276            .name = "discard",
1277            .type = QEMU_OPT_STRING,
1278            .help = "discard operation (ignore/off, unmap/on)",
1279        },
1280        {
1281            .name = BDRV_OPT_FORCE_SHARE,
1282            .type = QEMU_OPT_BOOL,
1283            .help = "always accept other writers (default: off)",
1284        },
1285        { /* end of list */ }
1286    },
1287};
1288
1289/*
1290 * Common part for opening disk images and files
1291 *
1292 * Removes all processed options from *options.
1293 */
1294static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
1295                            QDict *options, Error **errp)
1296{
1297    int ret, open_flags;
1298    const char *filename;
1299    const char *driver_name = NULL;
1300    const char *node_name = NULL;
1301    const char *discard;
1302    const char *detect_zeroes;
1303    QemuOpts *opts;
1304    BlockDriver *drv;
1305    Error *local_err = NULL;
1306
1307    assert(bs->file == NULL);
1308    assert(options != NULL && bs->options != options);
1309
1310    opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1311    qemu_opts_absorb_qdict(opts, options, &local_err);
1312    if (local_err) {
1313        error_propagate(errp, local_err);
1314        ret = -EINVAL;
1315        goto fail_opts;
1316    }
1317
1318    update_flags_from_options(&bs->open_flags, opts);
1319
1320    driver_name = qemu_opt_get(opts, "driver");
1321    drv = bdrv_find_format(driver_name);
1322    assert(drv != NULL);
1323
1324    bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
1325
1326    if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
1327        error_setg(errp,
1328                   BDRV_OPT_FORCE_SHARE
1329                   "=on can only be used with read-only images");
1330        ret = -EINVAL;
1331        goto fail_opts;
1332    }
1333
1334    if (file != NULL) {
1335        filename = blk_bs(file)->filename;
1336    } else {
1337        /*
1338         * Caution: while qdict_get_try_str() is fine, getting
1339         * non-string types would require more care.  When @options
1340         * come from -blockdev or blockdev_add, its members are typed
1341         * according to the QAPI schema, but when they come from
1342         * -drive, they're all QString.
1343         */
1344        filename = qdict_get_try_str(options, "filename");
1345    }
1346
1347    if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
1348        error_setg(errp, "The '%s' block driver requires a file name",
1349                   drv->format_name);
1350        ret = -EINVAL;
1351        goto fail_opts;
1352    }
1353
1354    trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
1355                           drv->format_name);
1356
1357    bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
1358
1359    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
1360        error_setg(errp,
1361                   !bs->read_only && bdrv_is_whitelisted(drv, true)
1362                        ? "Driver '%s' can only be used for read-only devices"
1363                        : "Driver '%s' is not whitelisted",
1364                   drv->format_name);
1365        ret = -ENOTSUP;
1366        goto fail_opts;
1367    }
1368
1369    /* bdrv_new() and bdrv_close() make it so */
1370    assert(atomic_read(&bs->copy_on_read) == 0);
1371
1372    if (bs->open_flags & BDRV_O_COPY_ON_READ) {
1373        if (!bs->read_only) {
1374            bdrv_enable_copy_on_read(bs);
1375        } else {
1376            error_setg(errp, "Can't use copy-on-read on read-only device");
1377            ret = -EINVAL;
1378            goto fail_opts;
1379        }
1380    }
1381
1382    discard = qemu_opt_get(opts, "discard");
1383    if (discard != NULL) {
1384        if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
1385            error_setg(errp, "Invalid discard option");
1386            ret = -EINVAL;
1387            goto fail_opts;
1388        }
1389    }
1390
1391    detect_zeroes = qemu_opt_get(opts, "detect-zeroes");
1392    if (detect_zeroes) {
1393        BlockdevDetectZeroesOptions value =
1394            qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup,
1395                            detect_zeroes,
1396                            BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
1397                            &local_err);
1398        if (local_err) {
1399            error_propagate(errp, local_err);
1400            ret = -EINVAL;
1401            goto fail_opts;
1402        }
1403
1404        if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
1405            !(bs->open_flags & BDRV_O_UNMAP))
1406        {
1407            error_setg(errp, "setting detect-zeroes to unmap is not allowed "
1408                             "without setting discard operation to unmap");
1409            ret = -EINVAL;
1410            goto fail_opts;
1411        }
1412
1413        bs->detect_zeroes = value;
1414    }
1415
1416    if (filename != NULL) {
1417        pstrcpy(bs->filename, sizeof(bs->filename), filename);
1418    } else {
1419        bs->filename[0] = '\0';
1420    }
1421    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
1422
1423    /* Open the image, either directly or using a protocol */
1424    open_flags = bdrv_open_flags(bs, bs->open_flags);
1425    node_name = qemu_opt_get(opts, "node-name");
1426
1427    assert(!drv->bdrv_file_open || file == NULL);
1428    ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
1429    if (ret < 0) {
1430        goto fail_opts;
1431    }
1432
1433    qemu_opts_del(opts);
1434    return 0;
1435
1436fail_opts:
1437    qemu_opts_del(opts);
1438    return ret;
1439}
1440
1441static QDict *parse_json_filename(const char *filename, Error **errp)
1442{
1443    QObject *options_obj;
1444    QDict *options;
1445    int ret;
1446
1447    ret = strstart(filename, "json:", &filename);
1448    assert(ret);
1449
1450    options_obj = qobject_from_json(filename, errp);
1451    if (!options_obj) {
1452        /* Work around qobject_from_json() lossage TODO fix that */
1453        if (errp && !*errp) {
1454            error_setg(errp, "Could not parse the JSON options");
1455            return NULL;
1456        }
1457        error_prepend(errp, "Could not parse the JSON options: ");
1458        return NULL;
1459    }
1460
1461    options = qobject_to(QDict, options_obj);
1462    if (!options) {
1463        qobject_decref(options_obj);
1464        error_setg(errp, "Invalid JSON object given");
1465        return NULL;
1466    }
1467
1468    qdict_flatten(options);
1469
1470    return options;
1471}
1472
1473static void parse_json_protocol(QDict *options, const char **pfilename,
1474                                Error **errp)
1475{
1476    QDict *json_options;
1477    Error *local_err = NULL;
1478
1479    /* Parse json: pseudo-protocol */
1480    if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1481        return;
1482    }
1483
1484    json_options = parse_json_filename(*pfilename, &local_err);
1485    if (local_err) {
1486        error_propagate(errp, local_err);
1487        return;
1488    }
1489
1490    /* Options given in the filename have lower priority than options
1491     * specified directly */
1492    qdict_join(options, json_options, false);
1493    QDECREF(json_options);
1494    *pfilename = NULL;
1495}
1496
1497/*
1498 * Fills in default options for opening images and converts the legacy
1499 * filename/flags pair to option QDict entries.
1500 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1501 * block driver has been specified explicitly.
1502 */
1503static int bdrv_fill_options(QDict **options, const char *filename,
1504                             int *flags, Error **errp)
1505{
1506    const char *drvname;
1507    bool protocol = *flags & BDRV_O_PROTOCOL;
1508    bool parse_filename = false;
1509    BlockDriver *drv = NULL;
1510    Error *local_err = NULL;
1511
1512    /*
1513     * Caution: while qdict_get_try_str() is fine, getting non-string
1514     * types would require more care.  When @options come from
1515     * -blockdev or blockdev_add, its members are typed according to
1516     * the QAPI schema, but when they come from -drive, they're all
1517     * QString.
1518     */
1519    drvname = qdict_get_try_str(*options, "driver");
1520    if (drvname) {
1521        drv = bdrv_find_format(drvname);
1522        if (!drv) {
1523            error_setg(errp, "Unknown driver '%s'", drvname);
1524            return -ENOENT;
1525        }
1526        /* If the user has explicitly specified the driver, this choice should
1527         * override the BDRV_O_PROTOCOL flag */
1528        protocol = drv->bdrv_file_open;
1529    }
1530
1531    if (protocol) {
1532        *flags |= BDRV_O_PROTOCOL;
1533    } else {
1534        *flags &= ~BDRV_O_PROTOCOL;
1535    }
1536
1537    /* Translate cache options from flags into options */
1538    update_options_from_flags(*options, *flags);
1539
1540    /* Fetch the file name from the options QDict if necessary */
1541    if (protocol && filename) {
1542        if (!qdict_haskey(*options, "filename")) {
1543            qdict_put_str(*options, "filename", filename);
1544            parse_filename = true;
1545        } else {
1546            error_setg(errp, "Can't specify 'file' and 'filename' options at "
1547                             "the same time");
1548            return -EINVAL;
1549        }
1550    }
1551
1552    /* Find the right block driver */
1553    /* See cautionary note on accessing @options above */
1554    filename = qdict_get_try_str(*options, "filename");
1555
1556    if (!drvname && protocol) {
1557        if (filename) {
1558            drv = bdrv_find_protocol(filename, parse_filename, errp);
1559            if (!drv) {
1560                return -EINVAL;
1561            }
1562
1563            drvname = drv->format_name;
1564            qdict_put_str(*options, "driver", drvname);
1565        } else {
1566            error_setg(errp, "Must specify either driver or file");
1567            return -EINVAL;
1568        }
1569    }
1570
1571    assert(drv || !protocol);
1572
1573    /* Driver-specific filename parsing */
1574    if (drv && drv->bdrv_parse_filename && parse_filename) {
1575        drv->bdrv_parse_filename(filename, *options, &local_err);
1576        if (local_err) {
1577            error_propagate(errp, local_err);
1578            return -EINVAL;
1579        }
1580
1581        if (!drv->bdrv_needs_filename) {
1582            qdict_del(*options, "filename");
1583        }
1584    }
1585
1586    return 0;
1587}
1588
1589static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
1590                                 uint64_t perm, uint64_t shared,
1591                                 GSList *ignore_children, Error **errp);
1592static void bdrv_child_abort_perm_update(BdrvChild *c);
1593static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
1594
1595typedef struct BlockReopenQueueEntry {
1596     bool prepared;
1597     BDRVReopenState state;
1598     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1599} BlockReopenQueueEntry;
1600
1601/*
1602 * Return the flags that @bs will have after the reopens in @q have
1603 * successfully completed. If @q is NULL (or @bs is not contained in @q),
1604 * return the current flags.
1605 */
1606static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
1607{
1608    BlockReopenQueueEntry *entry;
1609
1610    if (q != NULL) {
1611        QSIMPLEQ_FOREACH(entry, q, entry) {
1612            if (entry->state.bs == bs) {
1613                return entry->state.flags;
1614            }
1615        }
1616    }
1617
1618    return bs->open_flags;
1619}
1620
1621/* Returns whether the image file can be written to after the reopen queue @q
1622 * has been successfully applied, or right now if @q is NULL. */
1623static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
1624                                          BlockReopenQueue *q)
1625{
1626    int flags = bdrv_reopen_get_flags(q, bs);
1627
1628    return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
1629}
1630
1631/*
1632 * Return whether the BDS can be written to.  This is not necessarily
1633 * the same as !bdrv_is_read_only(bs), as inactivated images may not
1634 * be written to but do not count as read-only images.
1635 */
1636bool bdrv_is_writable(BlockDriverState *bs)
1637{
1638    return bdrv_is_writable_after_reopen(bs, NULL);
1639}
1640
1641static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
1642                            BdrvChild *c, const BdrvChildRole *role,
1643                            BlockReopenQueue *reopen_queue,
1644                            uint64_t parent_perm, uint64_t parent_shared,
1645                            uint64_t *nperm, uint64_t *nshared)
1646{
1647    if (bs->drv && bs->drv->bdrv_child_perm) {
1648        bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
1649                                 parent_perm, parent_shared,
1650                                 nperm, nshared);
1651    }
1652    /* TODO Take force_share from reopen_queue */
1653    if (child_bs && child_bs->force_share) {
1654        *nshared = BLK_PERM_ALL;
1655    }
1656}
1657
1658/*
1659 * Check whether permissions on this node can be changed in a way that
1660 * @cumulative_perms and @cumulative_shared_perms are the new cumulative
1661 * permissions of all its parents. This involves checking whether all necessary
1662 * permission changes to child nodes can be performed.
1663 *
1664 * A call to this function must always be followed by a call to bdrv_set_perm()
1665 * or bdrv_abort_perm_update().
1666 */
1667static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
1668                           uint64_t cumulative_perms,
1669                           uint64_t cumulative_shared_perms,
1670                           GSList *ignore_children, Error **errp)
1671{
1672    BlockDriver *drv = bs->drv;
1673    BdrvChild *c;
1674    int ret;
1675
1676    /* Write permissions never work with read-only images */
1677    if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
1678        !bdrv_is_writable_after_reopen(bs, q))
1679    {
1680        error_setg(errp, "Block node is read-only");
1681        return -EPERM;
1682    }
1683
1684    /* Check this node */
1685    if (!drv) {
1686        return 0;
1687    }
1688
1689    if (drv->bdrv_check_perm) {
1690        return drv->bdrv_check_perm(bs, cumulative_perms,
1691                                    cumulative_shared_perms, errp);
1692    }
1693
1694    /* Drivers that never have children can omit .bdrv_child_perm() */
1695    if (!drv->bdrv_child_perm) {
1696        assert(QLIST_EMPTY(&bs->children));
1697        return 0;
1698    }
1699
1700    /* Check all children */
1701    QLIST_FOREACH(c, &bs->children, next) {
1702        uint64_t cur_perm, cur_shared;
1703        bdrv_child_perm(bs, c->bs, c, c->role, q,
1704                        cumulative_perms, cumulative_shared_perms,
1705                        &cur_perm, &cur_shared);
1706        ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared,
1707                                    ignore_children, errp);
1708        if (ret < 0) {
1709            return ret;
1710        }
1711    }
1712
1713    return 0;
1714}
1715
1716/*
1717 * Notifies drivers that after a previous bdrv_check_perm() call, the
1718 * permission update is not performed and any preparations made for it (e.g.
1719 * taken file locks) need to be undone.
1720 *
1721 * This function recursively notifies all child nodes.
1722 */
1723static void bdrv_abort_perm_update(BlockDriverState *bs)
1724{
1725    BlockDriver *drv = bs->drv;
1726    BdrvChild *c;
1727
1728    if (!drv) {
1729        return;
1730    }
1731
1732    if (drv->bdrv_abort_perm_update) {
1733        drv->bdrv_abort_perm_update(bs);
1734    }
1735
1736    QLIST_FOREACH(c, &bs->children, next) {
1737        bdrv_child_abort_perm_update(c);
1738    }
1739}
1740
1741static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
1742                          uint64_t cumulative_shared_perms)
1743{
1744    BlockDriver *drv = bs->drv;
1745    BdrvChild *c;
1746
1747    if (!drv) {
1748        return;
1749    }
1750
1751    /* Update this node */
1752    if (drv->bdrv_set_perm) {
1753        drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
1754    }
1755
1756    /* Drivers that never have children can omit .bdrv_child_perm() */
1757    if (!drv->bdrv_child_perm) {
1758        assert(QLIST_EMPTY(&bs->children));
1759        return;
1760    }
1761
1762    /* Update all children */
1763    QLIST_FOREACH(c, &bs->children, next) {
1764        uint64_t cur_perm, cur_shared;
1765        bdrv_child_perm(bs, c->bs, c, c->role, NULL,
1766                        cumulative_perms, cumulative_shared_perms,
1767                        &cur_perm, &cur_shared);
1768        bdrv_child_set_perm(c, cur_perm, cur_shared);
1769    }
1770}
1771
1772static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
1773                                     uint64_t *shared_perm)
1774{
1775    BdrvChild *c;
1776    uint64_t cumulative_perms = 0;
1777    uint64_t cumulative_shared_perms = BLK_PERM_ALL;
1778
1779    QLIST_FOREACH(c, &bs->parents, next_parent) {
1780        cumulative_perms |= c->perm;
1781        cumulative_shared_perms &= c->shared_perm;
1782    }
1783
1784    *perm = cumulative_perms;
1785    *shared_perm = cumulative_shared_perms;
1786}
1787
1788static char *bdrv_child_user_desc(BdrvChild *c)
1789{
1790    if (c->role->get_parent_desc) {
1791        return c->role->get_parent_desc(c);
1792    }
1793
1794    return g_strdup("another user");
1795}
1796
1797char *bdrv_perm_names(uint64_t perm)
1798{
1799    struct perm_name {
1800        uint64_t perm;
1801        const char *name;
1802    } permissions[] = {
1803        { BLK_PERM_CONSISTENT_READ, "consistent read" },
1804        { BLK_PERM_WRITE,           "write" },
1805        { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
1806        { BLK_PERM_RESIZE,          "resize" },
1807        { BLK_PERM_GRAPH_MOD,       "change children" },
1808        { 0, NULL }
1809    };
1810
1811    char *result = g_strdup("");
1812    struct perm_name *p;
1813
1814    for (p = permissions; p->name; p++) {
1815        if (perm & p->perm) {
1816            char *old = result;
1817            result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name);
1818            g_free(old);
1819        }
1820    }
1821
1822    return result;
1823}
1824
1825/*
1826 * Checks whether a new reference to @bs can be added if the new user requires
1827 * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is
1828 * set, the BdrvChild objects in this list are ignored in the calculations;
1829 * this allows checking permission updates for an existing reference.
1830 *
1831 * Needs to be followed by a call to either bdrv_set_perm() or
1832 * bdrv_abort_perm_update(). */
1833static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
1834                                  uint64_t new_used_perm,
1835                                  uint64_t new_shared_perm,
1836                                  GSList *ignore_children, Error **errp)
1837{
1838    BdrvChild *c;
1839    uint64_t cumulative_perms = new_used_perm;
1840    uint64_t cumulative_shared_perms = new_shared_perm;
1841
1842    /* There is no reason why anyone couldn't tolerate write_unchanged */
1843    assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
1844
1845    QLIST_FOREACH(c, &bs->parents, next_parent) {
1846        if (g_slist_find(ignore_children, c)) {
1847            continue;
1848        }
1849
1850        if ((new_used_perm & c->shared_perm) != new_used_perm) {
1851            char *user = bdrv_child_user_desc(c);
1852            char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
1853            error_setg(errp, "Conflicts with use by %s as '%s', which does not "
1854                             "allow '%s' on %s",
1855                       user, c->name, perm_names, bdrv_get_node_name(c->bs));
1856            g_free(user);
1857            g_free(perm_names);
1858            return -EPERM;
1859        }
1860
1861        if ((c->perm & new_shared_perm) != c->perm) {
1862            char *user = bdrv_child_user_desc(c);
1863            char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
1864            error_setg(errp, "Conflicts with use by %s as '%s', which uses "
1865                             "'%s' on %s",
1866                       user, c->name, perm_names, bdrv_get_node_name(c->bs));
1867            g_free(user);
1868            g_free(perm_names);
1869            return -EPERM;
1870        }
1871
1872        cumulative_perms |= c->perm;
1873        cumulative_shared_perms &= c->shared_perm;
1874    }
1875
1876    return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms,
1877                           ignore_children, errp);
1878}
1879
1880/* Needs to be followed by a call to either bdrv_child_set_perm() or
1881 * bdrv_child_abort_perm_update(). */
1882static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
1883                                 uint64_t perm, uint64_t shared,
1884                                 GSList *ignore_children, Error **errp)
1885{
1886    int ret;
1887
1888    ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c);
1889    ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp);
1890    g_slist_free(ignore_children);
1891
1892    return ret;
1893}
1894
1895static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
1896{
1897    uint64_t cumulative_perms, cumulative_shared_perms;
1898
1899    c->perm = perm;
1900    c->shared_perm = shared;
1901
1902    bdrv_get_cumulative_perm(c->bs, &cumulative_perms,
1903                             &cumulative_shared_perms);
1904    bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
1905}
1906
1907static void bdrv_child_abort_perm_update(BdrvChild *c)
1908{
1909    bdrv_abort_perm_update(c->bs);
1910}
1911
1912int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
1913                            Error **errp)
1914{
1915    int ret;
1916
1917    ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, errp);
1918    if (ret < 0) {
1919        bdrv_child_abort_perm_update(c);
1920        return ret;
1921    }
1922
1923    bdrv_child_set_perm(c, perm, shared);
1924
1925    return 0;
1926}
1927
1928#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
1929                                 | BLK_PERM_WRITE \
1930                                 | BLK_PERM_WRITE_UNCHANGED \
1931                                 | BLK_PERM_RESIZE)
1932#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
1933
1934void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
1935                               const BdrvChildRole *role,
1936                               BlockReopenQueue *reopen_queue,
1937                               uint64_t perm, uint64_t shared,
1938                               uint64_t *nperm, uint64_t *nshared)
1939{
1940    if (c == NULL) {
1941        *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
1942        *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
1943        return;
1944    }
1945
1946    *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
1947             (c->perm & DEFAULT_PERM_UNCHANGED);
1948    *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
1949               (c->shared_perm & DEFAULT_PERM_UNCHANGED);
1950}
1951
1952void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
1953                               const BdrvChildRole *role,
1954                               BlockReopenQueue *reopen_queue,
1955                               uint64_t perm, uint64_t shared,
1956                               uint64_t *nperm, uint64_t *nshared)
1957{
1958    bool backing = (role == &child_backing);
1959    assert(role == &child_backing || role == &child_file);
1960
1961    if (!backing) {
1962        int flags = bdrv_reopen_get_flags(reopen_queue, bs);
1963
1964        /* Apart from the modifications below, the same permissions are
1965         * forwarded and left alone as for filters */
1966        bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
1967                                  &perm, &shared);
1968
1969        /* Format drivers may touch metadata even if the guest doesn't write */
1970        if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
1971            perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
1972        }
1973
1974        /* bs->file always needs to be consistent because of the metadata. We
1975         * can never allow other users to resize or write to it. */
1976        if (!(flags & BDRV_O_NO_IO)) {
1977            perm |= BLK_PERM_CONSISTENT_READ;
1978        }
1979        shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
1980    } else {
1981        /* We want consistent read from backing files if the parent needs it.
1982         * No other operations are performed on backing files. */
1983        perm &= BLK_PERM_CONSISTENT_READ;
1984
1985        /* If the parent can deal with changing data, we're okay with a
1986         * writable and resizable backing file. */
1987        /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */
1988        if (shared & BLK_PERM_WRITE) {
1989            shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
1990        } else {
1991            shared = 0;
1992        }
1993
1994        shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
1995                  BLK_PERM_WRITE_UNCHANGED;
1996    }
1997
1998    if (bs->open_flags & BDRV_O_INACTIVE) {
1999        shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2000    }
2001
2002    *nperm = perm;
2003    *nshared = shared;
2004}
2005
2006static void bdrv_replace_child_noperm(BdrvChild *child,
2007                                      BlockDriverState *new_bs)
2008{
2009    BlockDriverState *old_bs = child->bs;
2010    int i;
2011
2012    if (old_bs && new_bs) {
2013        assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
2014    }
2015    if (old_bs) {
2016        /* Detach first so that the recursive drain sections coming from @child
2017         * are already gone and we only end the drain sections that came from
2018         * elsewhere. */
2019        if (child->role->detach) {
2020            child->role->detach(child);
2021        }
2022        if (old_bs->quiesce_counter && child->role->drained_end) {
2023            for (i = 0; i < old_bs->quiesce_counter; i++) {
2024                child->role->drained_end(child);
2025            }
2026        }
2027        QLIST_REMOVE(child, next_parent);
2028    }
2029
2030    child->bs = new_bs;
2031
2032    if (new_bs) {
2033        QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
2034        if (new_bs->quiesce_counter && child->role->drained_begin) {
2035            for (i = 0; i < new_bs->quiesce_counter; i++) {
2036                child->role->drained_begin(child);
2037            }
2038        }
2039
2040        /* Attach only after starting new drained sections, so that recursive
2041         * drain sections coming from @child don't get an extra .drained_begin
2042         * callback. */
2043        if (child->role->attach) {
2044            child->role->attach(child);
2045        }
2046    }
2047}
2048
2049/*
2050 * Updates @child to change its reference to point to @new_bs, including
2051 * checking and applying the necessary permisson updates both to the old node
2052 * and to @new_bs.
2053 *
2054 * NULL is passed as @new_bs for removing the reference before freeing @child.
2055 *
2056 * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
2057 * function uses bdrv_set_perm() to update the permissions according to the new
2058 * reference that @new_bs gets.
2059 */
2060static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
2061{
2062    BlockDriverState *old_bs = child->bs;
2063    uint64_t perm, shared_perm;
2064
2065    bdrv_replace_child_noperm(child, new_bs);
2066
2067    if (old_bs) {
2068        /* Update permissions for old node. This is guaranteed to succeed
2069         * because we're just taking a parent away, so we're loosening
2070         * restrictions. */
2071        bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
2072        bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, &error_abort);
2073        bdrv_set_perm(old_bs, perm, shared_perm);
2074    }
2075
2076    if (new_bs) {
2077        bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm);
2078        bdrv_set_perm(new_bs, perm, shared_perm);
2079    }
2080}
2081
2082BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
2083                                  const char *child_name,
2084                                  const BdrvChildRole *child_role,
2085                                  uint64_t perm, uint64_t shared_perm,
2086                                  void *opaque, Error **errp)
2087{
2088    BdrvChild *child;
2089    int ret;
2090
2091    ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
2092    if (ret < 0) {
2093        bdrv_abort_perm_update(child_bs);
2094        return NULL;
2095    }
2096
2097    child = g_new(BdrvChild, 1);
2098    *child = (BdrvChild) {
2099        .bs             = NULL,
2100        .name           = g_strdup(child_name),
2101        .role           = child_role,
2102        .perm           = perm,
2103        .shared_perm    = shared_perm,
2104        .opaque         = opaque,
2105    };
2106
2107    /* This performs the matching bdrv_set_perm() for the above check. */
2108    bdrv_replace_child(child, child_bs);
2109
2110    return child;
2111}
2112
2113BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
2114                             BlockDriverState *child_bs,
2115                             const char *child_name,
2116                             const BdrvChildRole *child_role,
2117                             Error **errp)
2118{
2119    BdrvChild *child;
2120    uint64_t perm, shared_perm;
2121
2122    bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
2123
2124    assert(parent_bs->drv);
2125    assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs));
2126    bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
2127                    perm, shared_perm, &perm, &shared_perm);
2128
2129    child = bdrv_root_attach_child(child_bs, child_name, child_role,
2130                                   perm, shared_perm, parent_bs, errp);
2131    if (child == NULL) {
2132        return NULL;
2133    }
2134
2135    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
2136    return child;
2137}
2138
2139static void bdrv_detach_child(BdrvChild *child)
2140{
2141    if (child->next.le_prev) {
2142        QLIST_REMOVE(child, next);
2143        child->next.le_prev = NULL;
2144    }
2145
2146    bdrv_replace_child(child, NULL);
2147
2148    g_free(child->name);
2149    g_free(child);
2150}
2151
2152void bdrv_root_unref_child(BdrvChild *child)
2153{
2154    BlockDriverState *child_bs;
2155
2156    child_bs = child->bs;
2157    bdrv_detach_child(child);
2158    bdrv_unref(child_bs);
2159}
2160
2161void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
2162{
2163    if (child == NULL) {
2164        return;
2165    }
2166
2167    if (child->bs->inherits_from == parent) {
2168        BdrvChild *c;
2169
2170        /* Remove inherits_from only when the last reference between parent and
2171         * child->bs goes away. */
2172        QLIST_FOREACH(c, &parent->children, next) {
2173            if (c != child && c->bs == child->bs) {
2174                break;
2175            }
2176        }
2177        if (c == NULL) {
2178            child->bs->inherits_from = NULL;
2179        }
2180    }
2181
2182    bdrv_root_unref_child(child);
2183}
2184
2185
2186static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
2187{
2188    BdrvChild *c;
2189    QLIST_FOREACH(c, &bs->parents, next_parent) {
2190        if (c->role->change_media) {
2191            c->role->change_media(c, load);
2192        }
2193    }
2194}
2195
2196static void bdrv_parent_cb_resize(BlockDriverState *bs)
2197{
2198    BdrvChild *c;
2199    QLIST_FOREACH(c, &bs->parents, next_parent) {
2200        if (c->role->resize) {
2201            c->role->resize(c);
2202        }
2203    }
2204}
2205
2206/*
2207 * Sets the backing file link of a BDS. A new reference is created; callers
2208 * which don't need their own reference any more must call bdrv_unref().
2209 */
2210void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
2211                         Error **errp)
2212{
2213    if (backing_hd) {
2214        bdrv_ref(backing_hd);
2215    }
2216
2217    if (bs->backing) {
2218        bdrv_unref_child(bs, bs->backing);
2219    }
2220
2221    if (!backing_hd) {
2222        bs->backing = NULL;
2223        goto out;
2224    }
2225
2226    bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing,
2227                                    errp);
2228    if (!bs->backing) {
2229        bdrv_unref(backing_hd);
2230    }
2231
2232    bdrv_refresh_filename(bs);
2233
2234out:
2235    bdrv_refresh_limits(bs, NULL);
2236}
2237
2238/*
2239 * Opens the backing file for a BlockDriverState if not yet open
2240 *
2241 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
2242 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
2243 * itself, all options starting with "${bdref_key}." are considered part of the
2244 * BlockdevRef.
2245 *
2246 * TODO Can this be unified with bdrv_open_image()?
2247 */
2248int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
2249                           const char *bdref_key, Error **errp)
2250{
2251    char *backing_filename = g_malloc0(PATH_MAX);
2252    char *bdref_key_dot;
2253    const char *reference = NULL;
2254    int ret = 0;
2255    BlockDriverState *backing_hd;
2256    QDict *options;
2257    QDict *tmp_parent_options = NULL;
2258    Error *local_err = NULL;
2259
2260    if (bs->backing != NULL) {
2261        goto free_exit;
2262    }
2263
2264    /* NULL means an empty set of options */
2265    if (parent_options == NULL) {
2266        tmp_parent_options = qdict_new();
2267        parent_options = tmp_parent_options;
2268    }
2269
2270    bs->open_flags &= ~BDRV_O_NO_BACKING;
2271
2272    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
2273    qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
2274    g_free(bdref_key_dot);
2275
2276    /*
2277     * Caution: while qdict_get_try_str() is fine, getting non-string
2278     * types would require more care.  When @parent_options come from
2279     * -blockdev or blockdev_add, its members are typed according to
2280     * the QAPI schema, but when they come from -drive, they're all
2281     * QString.
2282     */
2283    reference = qdict_get_try_str(parent_options, bdref_key);
2284    if (reference || qdict_haskey(options, "file.filename")) {
2285        backing_filename[0] = '\0';
2286    } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
2287        QDECREF(options);
2288        goto free_exit;
2289    } else {
2290        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
2291                                       &local_err);
2292        if (local_err) {
2293            ret = -EINVAL;
2294            error_propagate(errp, local_err);
2295            QDECREF(options);
2296            goto free_exit;
2297        }
2298    }
2299
2300    if (!bs->drv || !bs->drv->supports_backing) {
2301        ret = -EINVAL;
2302        error_setg(errp, "Driver doesn't support backing files");
2303        QDECREF(options);
2304        goto free_exit;
2305    }
2306
2307    if (!reference &&
2308        bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
2309        qdict_put_str(options, "driver", bs->backing_format);
2310    }
2311
2312    backing_hd = bdrv_open_inherit(*backing_filename ? backing_filename : NULL,
2313                                   reference, options, 0, bs, &child_backing,
2314                                   errp);
2315    if (!backing_hd) {
2316        bs->open_flags |= BDRV_O_NO_BACKING;
2317        error_prepend(errp, "Could not open backing file: ");
2318        ret = -EINVAL;
2319        goto free_exit;
2320    }
2321    bdrv_set_aio_context(backing_hd, bdrv_get_aio_context(bs));
2322
2323    /* Hook up the backing file link; drop our reference, bs owns the
2324     * backing_hd reference now */
2325    bdrv_set_backing_hd(bs, backing_hd, &local_err);
2326    bdrv_unref(backing_hd);
2327    if (local_err) {
2328        error_propagate(errp, local_err);
2329        ret = -EINVAL;
2330        goto free_exit;
2331    }
2332
2333    qdict_del(parent_options, bdref_key);
2334
2335free_exit:
2336    g_free(backing_filename);
2337    QDECREF(tmp_parent_options);
2338    return ret;
2339}
2340
2341static BlockDriverState *
2342bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
2343                   BlockDriverState *parent, const BdrvChildRole *child_role,
2344                   bool allow_none, Error **errp)
2345{
2346    BlockDriverState *bs = NULL;
2347    QDict *image_options;
2348    char *bdref_key_dot;
2349    const char *reference;
2350
2351    assert(child_role != NULL);
2352
2353    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
2354    qdict_extract_subqdict(options, &image_options, bdref_key_dot);
2355    g_free(bdref_key_dot);
2356
2357    /*
2358     * Caution: while qdict_get_try_str() is fine, getting non-string
2359     * types would require more care.  When @options come from
2360     * -blockdev or blockdev_add, its members are typed according to
2361     * the QAPI schema, but when they come from -drive, they're all
2362     * QString.
2363     */
2364    reference = qdict_get_try_str(options, bdref_key);
2365    if (!filename && !reference && !qdict_size(image_options)) {
2366        if (!allow_none) {
2367            error_setg(errp, "A block device must be specified for \"%s\"",
2368                       bdref_key);
2369        }
2370        QDECREF(image_options);
2371        goto done;
2372    }
2373
2374    bs = bdrv_open_inherit(filename, reference, image_options, 0,
2375                           parent, child_role, errp);
2376    if (!bs) {
2377        goto done;
2378    }
2379
2380done:
2381    qdict_del(options, bdref_key);
2382    return bs;
2383}
2384
2385/*
2386 * Opens a disk image whose options are given as BlockdevRef in another block
2387 * device's options.
2388 *
2389 * If allow_none is true, no image will be opened if filename is false and no
2390 * BlockdevRef is given. NULL will be returned, but errp remains unset.
2391 *
2392 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
2393 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
2394 * itself, all options starting with "${bdref_key}." are considered part of the
2395 * BlockdevRef.
2396 *
2397 * The BlockdevRef will be removed from the options QDict.
2398 */
2399BdrvChild *bdrv_open_child(const char *filename,
2400                           QDict *options, const char *bdref_key,
2401                           BlockDriverState *parent,
2402                           const BdrvChildRole *child_role,
2403                           bool allow_none, Error **errp)
2404{
2405    BdrvChild *c;
2406    BlockDriverState *bs;
2407
2408    bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
2409                            allow_none, errp);
2410    if (bs == NULL) {
2411        return NULL;
2412    }
2413
2414    c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
2415    if (!c) {
2416        bdrv_unref(bs);
2417        return NULL;
2418    }
2419
2420    return c;
2421}
2422
2423/* TODO Future callers may need to specify parent/child_role in order for
2424 * option inheritance to work. Existing callers use it for the root node. */
2425BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
2426{
2427    BlockDriverState *bs = NULL;
2428    Error *local_err = NULL;
2429    QObject *obj = NULL;
2430    QDict *qdict = NULL;
2431    const char *reference = NULL;
2432    Visitor *v = NULL;
2433
2434    if (ref->type == QTYPE_QSTRING) {
2435        reference = ref->u.reference;
2436    } else {
2437        BlockdevOptions *options = &ref->u.definition;
2438        assert(ref->type == QTYPE_QDICT);
2439
2440        v = qobject_output_visitor_new(&obj);
2441        visit_type_BlockdevOptions(v, NULL, &options, &local_err);
2442        if (local_err) {
2443            error_propagate(errp, local_err);
2444            goto fail;
2445        }
2446        visit_complete(v, &obj);
2447
2448        qdict = qobject_to(QDict, obj);
2449        qdict_flatten(qdict);
2450
2451        /* bdrv_open_inherit() defaults to the values in bdrv_flags (for
2452         * compatibility with other callers) rather than what we want as the
2453         * real defaults. Apply the defaults here instead. */
2454        qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
2455        qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
2456        qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off");
2457    }
2458
2459    bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, errp);
2460    obj = NULL;
2461
2462fail:
2463    qobject_decref(obj);
2464    visit_free(v);
2465    return bs;
2466}
2467
2468static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
2469                                                   int flags,
2470                                                   QDict *snapshot_options,
2471                                                   Error **errp)
2472{
2473    /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
2474    char *tmp_filename = g_malloc0(PATH_MAX + 1);
2475    int64_t total_size;
2476    QemuOpts *opts = NULL;
2477    BlockDriverState *bs_snapshot = NULL;
2478    Error *local_err = NULL;
2479    int ret;
2480
2481    /* if snapshot, we create a temporary backing file and open it
2482       instead of opening 'filename' directly */
2483
2484    /* Get the required size from the image */
2485    total_size = bdrv_getlength(bs);
2486    if (total_size < 0) {
2487        error_setg_errno(errp, -total_size, "Could not get image size");
2488        goto out;
2489    }
2490
2491    /* Create the temporary image */
2492    ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
2493    if (ret < 0) {
2494        error_setg_errno(errp, -ret, "Could not get temporary filename");
2495        goto out;
2496    }
2497
2498    opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
2499                            &error_abort);
2500    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
2501    ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
2502    qemu_opts_del(opts);
2503    if (ret < 0) {
2504        error_prepend(errp, "Could not create temporary overlay '%s': ",
2505                      tmp_filename);
2506        goto out;
2507    }
2508
2509    /* Prepare options QDict for the temporary file */
2510    qdict_put_str(snapshot_options, "file.driver", "file");
2511    qdict_put_str(snapshot_options, "file.filename", tmp_filename);
2512    qdict_put_str(snapshot_options, "driver", "qcow2");
2513
2514    bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
2515    snapshot_options = NULL;
2516    if (!bs_snapshot) {
2517        goto out;
2518    }
2519
2520    /* bdrv_append() consumes a strong reference to bs_snapshot
2521     * (i.e. it will call bdrv_unref() on it) even on error, so in
2522     * order to be able to return one, we have to increase
2523     * bs_snapshot's refcount here */
2524    bdrv_ref(bs_snapshot);
2525    bdrv_append(bs_snapshot, bs, &local_err);
2526    if (local_err) {
2527        error_propagate(errp, local_err);
2528        bs_snapshot = NULL;
2529        goto out;
2530    }
2531
2532out:
2533    QDECREF(snapshot_options);
2534    g_free(tmp_filename);
2535    return bs_snapshot;
2536}
2537
2538/*
2539 * Opens a disk image (raw, qcow2, vmdk, ...)
2540 *
2541 * options is a QDict of options to pass to the block drivers, or NULL for an
2542 * empty set of options. The reference to the QDict belongs to the block layer
2543 * after the call (even on failure), so if the caller intends to reuse the
2544 * dictionary, it needs to use QINCREF() before calling bdrv_open.
2545 *
2546 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
2547 * If it is not NULL, the referenced BDS will be reused.
2548 *
2549 * The reference parameter may be used to specify an existing block device which
2550 * should be opened. If specified, neither options nor a filename may be given,
2551 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
2552 */
2553static BlockDriverState *bdrv_open_inherit(const char *filename,
2554                                           const char *reference,
2555                                           QDict *options, int flags,
2556                                           BlockDriverState *parent,
2557                                           const BdrvChildRole *child_role,
2558                                           Error **errp)
2559{
2560    int ret;
2561    BlockBackend *file = NULL;
2562    BlockDriverState *bs;
2563    BlockDriver *drv = NULL;
2564    const char *drvname;
2565    const char *backing;
2566    Error *local_err = NULL;
2567    QDict *snapshot_options = NULL;
2568    int snapshot_flags = 0;
2569
2570    assert(!child_role || !flags);
2571    assert(!child_role == !parent);
2572
2573    if (reference) {
2574        bool options_non_empty = options ? qdict_size(options) : false;
2575        QDECREF(options);
2576
2577        if (filename || options_non_empty) {
2578            error_setg(errp, "Cannot reference an existing block device with "
2579                       "additional options or a new filename");
2580            return NULL;
2581        }
2582
2583        bs = bdrv_lookup_bs(reference, reference, errp);
2584        if (!bs) {
2585            return NULL;
2586        }
2587
2588        bdrv_ref(bs);
2589        return bs;
2590    }
2591
2592    bs = bdrv_new();
2593
2594    /* NULL means an empty set of options */
2595    if (options == NULL) {
2596        options = qdict_new();
2597    }
2598
2599    /* json: syntax counts as explicit options, as if in the QDict */
2600    parse_json_protocol(options, &filename, &local_err);
2601    if (local_err) {
2602        goto fail;
2603    }
2604
2605    bs->explicit_options = qdict_clone_shallow(options);
2606
2607    if (child_role) {
2608        bs->inherits_from = parent;
2609        child_role->inherit_options(&flags, options,
2610                                    parent->open_flags, parent->options);
2611    }
2612
2613    ret = bdrv_fill_options(&options, filename, &flags, &local_err);
2614    if (local_err) {
2615        goto fail;
2616    }
2617
2618    /*
2619     * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
2620     * Caution: getting a boolean member of @options requires care.
2621     * When @options come from -blockdev or blockdev_add, members are
2622     * typed according to the QAPI schema, but when they come from
2623     * -drive, they're all QString.
2624     */
2625    if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
2626        !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
2627        flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
2628    } else {
2629        flags &= ~BDRV_O_RDWR;
2630    }
2631
2632    if (flags & BDRV_O_SNAPSHOT) {
2633        snapshot_options = qdict_new();
2634        bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
2635                                   flags, options);
2636        /* Let bdrv_backing_options() override "read-only" */
2637        qdict_del(options, BDRV_OPT_READ_ONLY);
2638        bdrv_backing_options(&flags, options, flags, options);
2639    }
2640
2641    bs->open_flags = flags;
2642    bs->options = options;
2643    options = qdict_clone_shallow(options);
2644
2645    /* Find the right image format driver */
2646    /* See cautionary note on accessing @options above */
2647    drvname = qdict_get_try_str(options, "driver");
2648    if (drvname) {
2649        drv = bdrv_find_format(drvname);
2650        if (!drv) {
2651            error_setg(errp, "Unknown driver: '%s'", drvname);
2652            goto fail;
2653        }
2654    }
2655
2656    assert(drvname || !(flags & BDRV_O_PROTOCOL));
2657
2658    /* See cautionary note on accessing @options above */
2659    backing = qdict_get_try_str(options, "backing");
2660    if (qobject_to(QNull, qdict_get(options, "backing")) != NULL ||
2661        (backing && *backing == '\0'))
2662    {
2663        if (backing) {
2664            warn_report("Use of \"backing\": \"\" is deprecated; "
2665                        "use \"backing\": null instead");
2666        }
2667        flags |= BDRV_O_NO_BACKING;
2668        qdict_del(options, "backing");
2669    }
2670
2671    /* Open image file without format layer. This BlockBackend is only used for
2672     * probing, the block drivers will do their own bdrv_open_child() for the
2673     * same BDS, which is why we put the node name back into options. */
2674    if ((flags & BDRV_O_PROTOCOL) == 0) {
2675        BlockDriverState *file_bs;
2676
2677        file_bs = bdrv_open_child_bs(filename, options, "file", bs,
2678                                     &child_file, true, &local_err);
2679        if (local_err) {
2680            goto fail;
2681        }
2682        if (file_bs != NULL) {
2683            /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
2684             * looking at the header to guess the image format. This works even
2685             * in cases where a guest would not see a consistent state. */
2686            file = blk_new(0, BLK_PERM_ALL);
2687            blk_insert_bs(file, file_bs, &local_err);
2688            bdrv_unref(file_bs);
2689            if (local_err) {
2690                goto fail;
2691            }
2692
2693            qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
2694        }
2695    }
2696
2697    /* Image format probing */
2698    bs->probed = !drv;
2699    if (!drv && file) {
2700        ret = find_image_format(file, filename, &drv, &local_err);
2701        if (ret < 0) {
2702            goto fail;
2703        }
2704        /*
2705         * This option update would logically belong in bdrv_fill_options(),
2706         * but we first need to open bs->file for the probing to work, while
2707         * opening bs->file already requires the (mostly) final set of options
2708         * so that cache mode etc. can be inherited.
2709         *
2710         * Adding the driver later is somewhat ugly, but it's not an option
2711         * that would ever be inherited, so it's correct. We just need to make
2712         * sure to update both bs->options (which has the full effective
2713         * options for bs) and options (which has file.* already removed).
2714         */
2715        qdict_put_str(bs->options, "driver", drv->format_name);
2716        qdict_put_str(options, "driver", drv->format_name);
2717    } else if (!drv) {
2718        error_setg(errp, "Must specify either driver or file");
2719        goto fail;
2720    }
2721
2722    /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
2723    assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
2724    /* file must be NULL if a protocol BDS is about to be created
2725     * (the inverse results in an error message from bdrv_open_common()) */
2726    assert(!(flags & BDRV_O_PROTOCOL) || !file);
2727
2728    /* Open the image */
2729    ret = bdrv_open_common(bs, file, options, &local_err);
2730    if (ret < 0) {
2731        goto fail;
2732    }
2733
2734    if (file) {
2735        blk_unref(file);
2736        file = NULL;
2737    }
2738
2739    /* If there is a backing file, use it */
2740    if ((flags & BDRV_O_NO_BACKING) == 0) {
2741        ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
2742        if (ret < 0) {
2743            goto close_and_fail;
2744        }
2745    }
2746
2747    bdrv_refresh_filename(bs);
2748
2749    /* Check if any unknown options were used */
2750    if (qdict_size(options) != 0) {
2751        const QDictEntry *entry = qdict_first(options);
2752        if (flags & BDRV_O_PROTOCOL) {
2753            error_setg(errp, "Block protocol '%s' doesn't support the option "
2754                       "'%s'", drv->format_name, entry->key);
2755        } else {
2756            error_setg(errp,
2757                       "Block format '%s' does not support the option '%s'",
2758                       drv->format_name, entry->key);
2759        }
2760
2761        goto close_and_fail;
2762    }
2763
2764    bdrv_parent_cb_change_media(bs, true);
2765
2766    QDECREF(options);
2767
2768    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
2769     * temporary snapshot afterwards. */
2770    if (snapshot_flags) {
2771        BlockDriverState *snapshot_bs;
2772        snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
2773                                                snapshot_options, &local_err);
2774        snapshot_options = NULL;
2775        if (local_err) {
2776            goto close_and_fail;
2777        }
2778        /* We are not going to return bs but the overlay on top of it
2779         * (snapshot_bs); thus, we have to drop the strong reference to bs
2780         * (which we obtained by calling bdrv_new()). bs will not be deleted,
2781         * though, because the overlay still has a reference to it. */
2782        bdrv_unref(bs);
2783        bs = snapshot_bs;
2784    }
2785
2786    return bs;
2787
2788fail:
2789    blk_unref(file);
2790    QDECREF(snapshot_options);
2791    QDECREF(bs->explicit_options);
2792    QDECREF(bs->options);
2793    QDECREF(options);
2794    bs->options = NULL;
2795    bs->explicit_options = NULL;
2796    bdrv_unref(bs);
2797    error_propagate(errp, local_err);
2798    return NULL;
2799
2800close_and_fail:
2801    bdrv_unref(bs);
2802    QDECREF(snapshot_options);
2803    QDECREF(options);
2804    error_propagate(errp, local_err);
2805    return NULL;
2806}
2807
2808BlockDriverState *bdrv_open(const char *filename, const char *reference,
2809                            QDict *options, int flags, Error **errp)
2810{
2811    return bdrv_open_inherit(filename, reference, options, flags, NULL,
2812                             NULL, errp);
2813}
2814
2815/*
2816 * Adds a BlockDriverState to a simple queue for an atomic, transactional
2817 * reopen of multiple devices.
2818 *
2819 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
2820 * already performed, or alternatively may be NULL a new BlockReopenQueue will
2821 * be created and initialized. This newly created BlockReopenQueue should be
2822 * passed back in for subsequent calls that are intended to be of the same
2823 * atomic 'set'.
2824 *
2825 * bs is the BlockDriverState to add to the reopen queue.
2826 *
2827 * options contains the changed options for the associated bs
2828 * (the BlockReopenQueue takes ownership)
2829 *
2830 * flags contains the open flags for the associated bs
2831 *
2832 * returns a pointer to bs_queue, which is either the newly allocated
2833 * bs_queue, or the existing bs_queue being used.
2834 *
2835 * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
2836 */
2837static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
2838                                                 BlockDriverState *bs,
2839                                                 QDict *options,
2840                                                 int flags,
2841                                                 const BdrvChildRole *role,
2842                                                 QDict *parent_options,
2843                                                 int parent_flags)
2844{
2845    assert(bs != NULL);
2846
2847    BlockReopenQueueEntry *bs_entry;
2848    BdrvChild *child;
2849    QDict *old_options, *explicit_options;
2850
2851    /* Make sure that the caller remembered to use a drained section. This is
2852     * important to avoid graph changes between the recursive queuing here and
2853     * bdrv_reopen_multiple(). */
2854    assert(bs->quiesce_counter > 0);
2855
2856    if (bs_queue == NULL) {
2857        bs_queue = g_new0(BlockReopenQueue, 1);
2858        QSIMPLEQ_INIT(bs_queue);
2859    }
2860
2861    if (!options) {
2862        options = qdict_new();
2863    }
2864
2865    /* Check if this BlockDriverState is already in the queue */
2866    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
2867        if (bs == bs_entry->state.bs) {
2868            break;
2869        }
2870    }
2871
2872    /*
2873     * Precedence of options:
2874     * 1. Explicitly passed in options (highest)
2875     * 2. Set in flags (only for top level)
2876     * 3. Retained from explicitly set options of bs
2877     * 4. Inherited from parent node
2878     * 5. Retained from effective options of bs
2879     */
2880
2881    if (!parent_options) {
2882        /*
2883         * Any setting represented by flags is always updated. If the
2884         * corresponding QDict option is set, it takes precedence. Otherwise
2885         * the flag is translated into a QDict option. The old setting of bs is
2886         * not considered.
2887         */
2888        update_options_from_flags(options, flags);
2889    }
2890
2891    /* Old explicitly set values (don't overwrite by inherited value) */
2892    if (bs_entry) {
2893        old_options = qdict_clone_shallow(bs_entry->state.explicit_options);
2894    } else {
2895        old_options = qdict_clone_shallow(bs->explicit_options);
2896    }
2897    bdrv_join_options(bs, options, old_options);
2898    QDECREF(old_options);
2899
2900    explicit_options = qdict_clone_shallow(options);
2901
2902    /* Inherit from parent node */
2903    if (parent_options) {
2904        QemuOpts *opts;
2905        QDict *options_copy;
2906        assert(!flags);
2907        role->inherit_options(&flags, options, parent_flags, parent_options);
2908        options_copy = qdict_clone_shallow(options);
2909        opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
2910        qemu_opts_absorb_qdict(opts, options_copy, NULL);
2911        update_flags_from_options(&flags, opts);
2912        qemu_opts_del(opts);
2913        QDECREF(options_copy);
2914    }
2915
2916    /* Old values are used for options that aren't set yet */
2917    old_options = qdict_clone_shallow(bs->options);
2918    bdrv_join_options(bs, options, old_options);
2919    QDECREF(old_options);
2920
2921    /* bdrv_open_inherit() sets and clears some additional flags internally */
2922    flags &= ~BDRV_O_PROTOCOL;
2923    if (flags & BDRV_O_RDWR) {
2924        flags |= BDRV_O_ALLOW_RDWR;
2925    }
2926
2927    if (!bs_entry) {
2928        bs_entry = g_new0(BlockReopenQueueEntry, 1);
2929        QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
2930    } else {
2931        QDECREF(bs_entry->state.options);
2932        QDECREF(bs_entry->state.explicit_options);
2933    }
2934
2935    bs_entry->state.bs = bs;
2936    bs_entry->state.options = options;
2937    bs_entry->state.explicit_options = explicit_options;
2938    bs_entry->state.flags = flags;
2939
2940    /* This needs to be overwritten in bdrv_reopen_prepare() */
2941    bs_entry->state.perm = UINT64_MAX;
2942    bs_entry->state.shared_perm = 0;
2943
2944    QLIST_FOREACH(child, &bs->children, next) {
2945        QDict *new_child_options;
2946        char *child_key_dot;
2947
2948        /* reopen can only change the options of block devices that were
2949         * implicitly created and inherited options. For other (referenced)
2950         * block devices, a syntax like "backing.foo" results in an error. */
2951        if (child->bs->inherits_from != bs) {
2952            continue;
2953        }
2954
2955        child_key_dot = g_strdup_printf("%s.", child->name);
2956        qdict_extract_subqdict(options, &new_child_options, child_key_dot);
2957        g_free(child_key_dot);
2958
2959        bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
2960                                child->role, options, flags);
2961    }
2962
2963    return bs_queue;
2964}
2965
2966BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
2967                                    BlockDriverState *bs,
2968                                    QDict *options, int flags)
2969{
2970    return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
2971                                   NULL, NULL, 0);
2972}
2973
2974/*
2975 * Reopen multiple BlockDriverStates atomically & transactionally.
2976 *
2977 * The queue passed in (bs_queue) must have been built up previous
2978 * via bdrv_reopen_queue().
2979 *
2980 * Reopens all BDS specified in the queue, with the appropriate
2981 * flags.  All devices are prepared for reopen, and failure of any
2982 * device will cause all device changes to be abandonded, and intermediate
2983 * data cleaned up.
2984 *
2985 * If all devices prepare successfully, then the changes are committed
2986 * to all devices.
2987 *
2988 * All affected nodes must be drained between bdrv_reopen_queue() and
2989 * bdrv_reopen_multiple().
2990 */
2991int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
2992{
2993    int ret = -1;
2994    BlockReopenQueueEntry *bs_entry, *next;
2995    Error *local_err = NULL;
2996
2997    assert(bs_queue != NULL);
2998
2999    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
3000        assert(bs_entry->state.bs->quiesce_counter > 0);
3001        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
3002            error_propagate(errp, local_err);
3003            goto cleanup;
3004        }
3005        bs_entry->prepared = true;
3006    }
3007
3008    /* If we reach this point, we have success and just need to apply the
3009     * changes
3010     */
3011    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
3012        bdrv_reopen_commit(&bs_entry->state);
3013    }
3014
3015    ret = 0;
3016
3017cleanup:
3018    QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
3019        if (ret && bs_entry->prepared) {
3020            bdrv_reopen_abort(&bs_entry->state);
3021        } else if (ret) {
3022            QDECREF(bs_entry->state.explicit_options);
3023        }
3024        QDECREF(bs_entry->state.options);
3025        g_free(bs_entry);
3026    }
3027    g_free(bs_queue);
3028
3029    return ret;
3030}
3031
3032
3033/* Reopen a single BlockDriverState with the specified flags. */
3034int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
3035{
3036    int ret = -1;
3037    Error *local_err = NULL;
3038    BlockReopenQueue *queue;
3039
3040    bdrv_subtree_drained_begin(bs);
3041
3042    queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
3043    ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
3044    if (local_err != NULL) {
3045        error_propagate(errp, local_err);
3046    }
3047
3048    bdrv_subtree_drained_end(bs);
3049
3050    return ret;
3051}
3052
3053static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q,
3054                                                          BdrvChild *c)
3055{
3056    BlockReopenQueueEntry *entry;
3057
3058    QSIMPLEQ_FOREACH(entry, q, entry) {
3059        BlockDriverState *bs = entry->state.bs;
3060        BdrvChild *child;
3061
3062        QLIST_FOREACH(child, &bs->children, next) {
3063            if (child == c) {
3064                return entry;
3065            }
3066        }
3067    }
3068
3069    return NULL;
3070}
3071
3072static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs,
3073                             uint64_t *perm, uint64_t *shared)
3074{
3075    BdrvChild *c;
3076    BlockReopenQueueEntry *parent;
3077    uint64_t cumulative_perms = 0;
3078    uint64_t cumulative_shared_perms = BLK_PERM_ALL;
3079
3080    QLIST_FOREACH(c, &bs->parents, next_parent) {
3081        parent = find_parent_in_reopen_queue(q, c);
3082        if (!parent) {
3083            cumulative_perms |= c->perm;
3084            cumulative_shared_perms &= c->shared_perm;
3085        } else {
3086            uint64_t nperm, nshared;
3087
3088            bdrv_child_perm(parent->state.bs, bs, c, c->role, q,
3089                            parent->state.perm, parent->state.shared_perm,
3090                            &nperm, &nshared);
3091
3092            cumulative_perms |= nperm;
3093            cumulative_shared_perms &= nshared;
3094        }
3095    }
3096    *perm = cumulative_perms;
3097    *shared = cumulative_shared_perms;
3098}
3099
3100/*
3101 * Prepares a BlockDriverState for reopen. All changes are staged in the
3102 * 'opaque' field of the BDRVReopenState, which is used and allocated by
3103 * the block driver layer .bdrv_reopen_prepare()
3104 *
3105 * bs is the BlockDriverState to reopen
3106 * flags are the new open flags
3107 * queue is the reopen queue
3108 *
3109 * Returns 0 on success, non-zero on error.  On error errp will be set
3110 * as well.
3111 *
3112 * On failure, bdrv_reopen_abort() will be called to clean up any data.
3113 * It is the responsibility of the caller to then call the abort() or
3114 * commit() for any other BDS that have been left in a prepare() state
3115 *
3116 */
3117int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
3118                        Error **errp)
3119{
3120    int ret = -1;
3121    Error *local_err = NULL;
3122    BlockDriver *drv;
3123    QemuOpts *opts;
3124    const char *value;
3125    bool read_only;
3126
3127    assert(reopen_state != NULL);
3128    assert(reopen_state->bs->drv != NULL);
3129    drv = reopen_state->bs->drv;
3130
3131    /* Process generic block layer options */
3132    opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
3133    qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
3134    if (local_err) {
3135        error_propagate(errp, local_err);
3136        ret = -EINVAL;
3137        goto error;
3138    }
3139
3140    update_flags_from_options(&reopen_state->flags, opts);
3141
3142    /* node-name and driver must be unchanged. Put them back into the QDict, so
3143     * that they are checked at the end of this function. */
3144    value = qemu_opt_get(opts, "node-name");
3145    if (value) {
3146        qdict_put_str(reopen_state->options, "node-name", value);
3147    }
3148
3149    value = qemu_opt_get(opts, "driver");
3150    if (value) {
3151        qdict_put_str(reopen_state->options, "driver", value);
3152    }
3153
3154    /* If we are to stay read-only, do not allow permission change
3155     * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
3156     * not set, or if the BDS still has copy_on_read enabled */
3157    read_only = !(reopen_state->flags & BDRV_O_RDWR);
3158    ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err);
3159    if (local_err) {
3160        error_propagate(errp, local_err);
3161        goto error;
3162    }
3163
3164    /* Calculate required permissions after reopening */
3165    bdrv_reopen_perm(queue, reopen_state->bs,
3166                     &reopen_state->perm, &reopen_state->shared_perm);
3167
3168    ret = bdrv_flush(reopen_state->bs);
3169    if (ret) {
3170        error_setg_errno(errp, -ret, "Error flushing drive");
3171        goto error;
3172    }
3173
3174    if (drv->bdrv_reopen_prepare) {
3175        ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
3176        if (ret) {
3177            if (local_err != NULL) {
3178                error_propagate(errp, local_err);
3179            } else {
3180                error_setg(errp, "failed while preparing to reopen image '%s'",
3181                           reopen_state->bs->filename);
3182            }
3183            goto error;
3184        }
3185    } else {
3186        /* It is currently mandatory to have a bdrv_reopen_prepare()
3187         * handler for each supported drv. */
3188        error_setg(errp, "Block format '%s' used by node '%s' "
3189                   "does not support reopening files", drv->format_name,
3190                   bdrv_get_device_or_node_name(reopen_state->bs));
3191        ret = -1;
3192        goto error;
3193    }
3194
3195    /* Options that are not handled are only okay if they are unchanged
3196     * compared to the old state. It is expected that some options are only
3197     * used for the initial open, but not reopen (e.g. filename) */
3198    if (qdict_size(reopen_state->options)) {
3199        const QDictEntry *entry = qdict_first(reopen_state->options);
3200
3201        do {
3202            QObject *new = entry->value;
3203            QObject *old = qdict_get(reopen_state->bs->options, entry->key);
3204
3205            /*
3206             * TODO: When using -drive to specify blockdev options, all values
3207             * will be strings; however, when using -blockdev, blockdev-add or
3208             * filenames using the json:{} pseudo-protocol, they will be
3209             * correctly typed.
3210             * In contrast, reopening options are (currently) always strings
3211             * (because you can only specify them through qemu-io; all other
3212             * callers do not specify any options).
3213             * Therefore, when using anything other than -drive to create a BDS,
3214             * this cannot detect non-string options as unchanged, because
3215             * qobject_is_equal() always returns false for objects of different
3216             * type.  In the future, this should be remedied by correctly typing
3217             * all options.  For now, this is not too big of an issue because
3218             * the user can simply omit options which cannot be changed anyway,
3219             * so they will stay unchanged.
3220             */
3221            if (!qobject_is_equal(new, old)) {
3222                error_setg(errp, "Cannot change the option '%s'", entry->key);
3223                ret = -EINVAL;
3224                goto error;
3225            }
3226        } while ((entry = qdict_next(reopen_state->options, entry)));
3227    }
3228
3229    ret = bdrv_check_perm(reopen_state->bs, queue, reopen_state->perm,
3230                          reopen_state->shared_perm, NULL, errp);
3231    if (ret < 0) {
3232        goto error;
3233    }
3234
3235    ret = 0;
3236
3237error:
3238    qemu_opts_del(opts);
3239    return ret;
3240}
3241
3242/*
3243 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
3244 * makes them final by swapping the staging BlockDriverState contents into
3245 * the active BlockDriverState contents.
3246 */
3247void bdrv_reopen_commit(BDRVReopenState *reopen_state)
3248{
3249    BlockDriver *drv;
3250    BlockDriverState *bs;
3251    bool old_can_write, new_can_write;
3252
3253    assert(reopen_state != NULL);
3254    bs = reopen_state->bs;
3255    drv = bs->drv;
3256    assert(drv != NULL);
3257
3258    old_can_write =
3259        !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
3260
3261    /* If there are any driver level actions to take */
3262    if (drv->bdrv_reopen_commit) {
3263        drv->bdrv_reopen_commit(reopen_state);
3264    }
3265
3266    /* set BDS specific flags now */
3267    QDECREF(bs->explicit_options);
3268
3269    bs->explicit_options   = reopen_state->explicit_options;
3270    bs->open_flags         = reopen_state->flags;
3271    bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
3272
3273    bdrv_refresh_limits(bs, NULL);
3274
3275    bdrv_set_perm(reopen_state->bs, reopen_state->perm,
3276                  reopen_state->shared_perm);
3277
3278    new_can_write =
3279        !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
3280    if (!old_can_write && new_can_write && drv->bdrv_reopen_bitmaps_rw) {
3281        Error *local_err = NULL;
3282        if (drv->bdrv_reopen_bitmaps_rw(bs, &local_err) < 0) {
3283            /* This is not fatal, bitmaps just left read-only, so all following
3284             * writes will fail. User can remove read-only bitmaps to unblock
3285             * writes.
3286             */
3287            error_reportf_err(local_err,
3288                              "%s: Failed to make dirty bitmaps writable: ",
3289                              bdrv_get_node_name(bs));
3290        }
3291    }
3292}
3293
3294/*
3295 * Abort the reopen, and delete and free the staged changes in
3296 * reopen_state
3297 */
3298void bdrv_reopen_abort(BDRVReopenState *reopen_state)
3299{
3300    BlockDriver *drv;
3301
3302    assert(reopen_state != NULL);
3303    drv = reopen_state->bs->drv;
3304    assert(drv != NULL);
3305
3306    if (drv->bdrv_reopen_abort) {
3307        drv->bdrv_reopen_abort(reopen_state);
3308    }
3309
3310    QDECREF(reopen_state->explicit_options);
3311
3312    bdrv_abort_perm_update(reopen_state->bs);
3313}
3314
3315
3316static void bdrv_close(BlockDriverState *bs)
3317{
3318    BdrvAioNotifier *ban, *ban_next;
3319    BdrvChild *child, *next;
3320
3321    assert(!bs->job);
3322    assert(!bs->refcnt);
3323
3324    bdrv_drained_begin(bs); /* complete I/O */
3325    bdrv_flush(bs);
3326    bdrv_drain(bs); /* in case flush left pending I/O */
3327
3328    if (bs->drv) {
3329        bs->drv->bdrv_close(bs);
3330        bs->drv = NULL;
3331    }
3332
3333    bdrv_set_backing_hd(bs, NULL, &error_abort);
3334
3335    if (bs->file != NULL) {
3336        bdrv_unref_child(bs, bs->file);
3337        bs->file = NULL;
3338    }
3339
3340    QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
3341        /* TODO Remove bdrv_unref() from drivers' close function and use
3342         * bdrv_unref_child() here */
3343        if (child->bs->inherits_from == bs) {
3344            child->bs->inherits_from = NULL;
3345        }
3346        bdrv_detach_child(child);
3347    }
3348
3349    g_free(bs->opaque);
3350    bs->opaque = NULL;
3351    atomic_set(&bs->copy_on_read, 0);
3352    bs->backing_file[0] = '\0';
3353    bs->backing_format[0] = '\0';
3354    bs->total_sectors = 0;
3355    bs->encrypted = false;
3356    bs->sg = false;
3357    QDECREF(bs->options);
3358    QDECREF(bs->explicit_options);
3359    bs->options = NULL;
3360    bs->explicit_options = NULL;
3361    QDECREF(bs->full_open_options);
3362    bs->full_open_options = NULL;
3363
3364    bdrv_release_named_dirty_bitmaps(bs);
3365    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
3366
3367    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3368        g_free(ban);
3369    }
3370    QLIST_INIT(&bs->aio_notifiers);
3371    bdrv_drained_end(bs);
3372}
3373
3374void bdrv_close_all(void)
3375{
3376    block_job_cancel_sync_all();
3377    nbd_export_close_all();
3378
3379    /* Drop references from requests still in flight, such as canceled block
3380     * jobs whose AIO context has not been polled yet */
3381    bdrv_drain_all();
3382
3383    blk_remove_all_bs();
3384    blockdev_close_all_bdrv_states();
3385
3386    assert(QTAILQ_EMPTY(&all_bdrv_states));
3387}
3388
3389static bool should_update_child(BdrvChild *c, BlockDriverState *to)
3390{
3391    BdrvChild *to_c;
3392
3393    if (c->role->stay_at_node) {
3394        return false;
3395    }
3396
3397    if (c->role == &child_backing) {
3398        /* If @from is a backing file of @to, ignore the child to avoid
3399         * creating a loop. We only want to change the pointer of other
3400         * parents. */
3401        QLIST_FOREACH(to_c, &to->children, next) {
3402            if (to_c == c) {
3403                break;
3404            }
3405        }
3406        if (to_c) {
3407            return false;
3408        }
3409    }
3410
3411    return true;
3412}
3413
3414void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
3415                       Error **errp)
3416{
3417    BdrvChild *c, *next;
3418    GSList *list = NULL, *p;
3419    uint64_t old_perm, old_shared;
3420    uint64_t perm = 0, shared = BLK_PERM_ALL;
3421    int ret;
3422
3423    assert(!atomic_read(&from->in_flight));
3424    assert(!atomic_read(&to->in_flight));
3425
3426    /* Make sure that @from doesn't go away until we have successfully attached
3427     * all of its parents to @to. */
3428    bdrv_ref(from);
3429
3430    /* Put all parents into @list and calculate their cumulative permissions */
3431    QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
3432        if (!should_update_child(c, to)) {
3433            continue;
3434        }
3435        list = g_slist_prepend(list, c);
3436        perm |= c->perm;
3437        shared &= c->shared_perm;
3438    }
3439
3440    /* Check whether the required permissions can be granted on @to, ignoring
3441     * all BdrvChild in @list so that they can't block themselves. */
3442    ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp);
3443    if (ret < 0) {
3444        bdrv_abort_perm_update(to);
3445        goto out;
3446    }
3447
3448    /* Now actually perform the change. We performed the permission check for
3449     * all elements of @list at once, so set the permissions all at once at the
3450     * very end. */
3451    for (p = list; p != NULL; p = p->next) {
3452        c = p->data;
3453
3454        bdrv_ref(to);
3455        bdrv_replace_child_noperm(c, to);
3456        bdrv_unref(from);
3457    }
3458
3459    bdrv_get_cumulative_perm(to, &old_perm, &old_shared);
3460    bdrv_set_perm(to, old_perm | perm, old_shared | shared);
3461
3462out:
3463    g_slist_free(list);
3464    bdrv_unref(from);
3465}
3466
3467/*
3468 * Add new bs contents at the top of an image chain while the chain is
3469 * live, while keeping required fields on the top layer.
3470 *
3471 * This will modify the BlockDriverState fields, and swap contents
3472 * between bs_new and bs_top. Both bs_new and bs_top are modified.
3473 *
3474 * bs_new must not be attached to a BlockBackend.
3475 *
3476 * This function does not create any image files.
3477 *
3478 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
3479 * that's what the callers commonly need. bs_new will be referenced by the old
3480 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
3481 * reference of its own, it must call bdrv_ref().
3482 */
3483void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
3484                 Error **errp)
3485{
3486    Error *local_err = NULL;
3487
3488    bdrv_set_backing_hd(bs_new, bs_top, &local_err);
3489    if (local_err) {
3490        error_propagate(errp, local_err);
3491        goto out;
3492    }
3493
3494    bdrv_replace_node(bs_top, bs_new, &local_err);
3495    if (local_err) {
3496        error_propagate(errp, local_err);
3497        bdrv_set_backing_hd(bs_new, NULL, &error_abort);
3498        goto out;
3499    }
3500
3501    /* bs_new is now referenced by its new parents, we don't need the
3502     * additional reference any more. */
3503out:
3504    bdrv_unref(bs_new);
3505}
3506
3507static void bdrv_delete(BlockDriverState *bs)
3508{
3509    assert(!bs->job);
3510    assert(bdrv_op_blocker_is_empty(bs));
3511    assert(!bs->refcnt);
3512
3513    bdrv_close(bs);
3514
3515    /* remove from list, if necessary */
3516    if (bs->node_name[0] != '\0') {
3517        QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
3518    }
3519    QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
3520
3521    g_free(bs);
3522}
3523
3524/*
3525 * Run consistency checks on an image
3526 *
3527 * Returns 0 if the check could be completed (it doesn't mean that the image is
3528 * free of errors) or -errno when an internal error occurred. The results of the
3529 * check are stored in res.
3530 */
3531static int coroutine_fn bdrv_co_check(BlockDriverState *bs,
3532                                      BdrvCheckResult *res, BdrvCheckMode fix)
3533{
3534    if (bs->drv == NULL) {
3535        return -ENOMEDIUM;
3536    }
3537    if (bs->drv->bdrv_co_check == NULL) {
3538        return -ENOTSUP;
3539    }
3540
3541    memset(res, 0, sizeof(*res));
3542    return bs->drv->bdrv_co_check(bs, res, fix);
3543}
3544
3545typedef struct CheckCo {
3546    BlockDriverState *bs;
3547    BdrvCheckResult *res;
3548    BdrvCheckMode fix;
3549    int ret;
3550} CheckCo;
3551
3552static void bdrv_check_co_entry(void *opaque)
3553{
3554    CheckCo *cco = opaque;
3555    cco->ret = bdrv_co_check(cco->bs, cco->res, cco->fix);
3556}
3557
3558int bdrv_check(BlockDriverState *bs,
3559               BdrvCheckResult *res, BdrvCheckMode fix)
3560{
3561    Coroutine *co;
3562    CheckCo cco = {
3563        .bs = bs,
3564        .res = res,
3565        .ret = -EINPROGRESS,
3566        .fix = fix,
3567    };
3568
3569    if (qemu_in_coroutine()) {
3570        /* Fast-path if already in coroutine context */
3571        bdrv_check_co_entry(&cco);
3572    } else {
3573        co = qemu_coroutine_create(bdrv_check_co_entry, &cco);
3574        qemu_coroutine_enter(co);
3575        BDRV_POLL_WHILE(bs, cco.ret == -EINPROGRESS);
3576    }
3577
3578    return cco.ret;
3579}
3580
3581/*
3582 * Return values:
3583 * 0        - success
3584 * -EINVAL  - backing format specified, but no file
3585 * -ENOSPC  - can't update the backing file because no space is left in the
3586 *            image file header
3587 * -ENOTSUP - format driver doesn't support changing the backing file
3588 */
3589int bdrv_change_backing_file(BlockDriverState *bs,
3590    const char *backing_file, const char *backing_fmt)
3591{
3592    BlockDriver *drv = bs->drv;
3593    int ret;
3594
3595    if (!drv) {
3596        return -ENOMEDIUM;
3597    }
3598
3599    /* Backing file format doesn't make sense without a backing file */
3600    if (backing_fmt && !backing_file) {
3601        return -EINVAL;
3602    }
3603
3604    if (drv->bdrv_change_backing_file != NULL) {
3605        ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
3606    } else {
3607        ret = -ENOTSUP;
3608    }
3609
3610    if (ret == 0) {
3611        pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
3612        pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
3613    }
3614    return ret;
3615}
3616
3617/*
3618 * Finds the image layer in the chain that has 'bs' as its backing file.
3619 *
3620 * active is the current topmost image.
3621 *
3622 * Returns NULL if bs is not found in active's image chain,
3623 * or if active == bs.
3624 *
3625 * Returns the bottommost base image if bs == NULL.
3626 */
3627BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
3628                                    BlockDriverState *bs)
3629{
3630    while (active && bs != backing_bs(active)) {
3631        active = backing_bs(active);
3632    }
3633
3634    return active;
3635}
3636
3637/* Given a BDS, searches for the base layer. */
3638BlockDriverState *bdrv_find_base(BlockDriverState *bs)
3639{
3640    return bdrv_find_overlay(bs, NULL);
3641}
3642
3643/*
3644 * Drops images above 'base' up to and including 'top', and sets the image
3645 * above 'top' to have base as its backing file.
3646 *
3647 * Requires that the overlay to 'top' is opened r/w, so that the backing file
3648 * information in 'bs' can be properly updated.
3649 *
3650 * E.g., this will convert the following chain:
3651 * bottom <- base <- intermediate <- top <- active
3652 *
3653 * to
3654 *
3655 * bottom <- base <- active
3656 *
3657 * It is allowed for bottom==base, in which case it converts:
3658 *
3659 * base <- intermediate <- top <- active
3660 *
3661 * to
3662 *
3663 * base <- active
3664 *
3665 * If backing_file_str is non-NULL, it will be used when modifying top's
3666 * overlay image metadata.
3667 *
3668 * Error conditions:
3669 *  if active == top, that is considered an error
3670 *
3671 */
3672int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
3673                           const char *backing_file_str)
3674{
3675    BdrvChild *c, *next;
3676    Error *local_err = NULL;
3677    int ret = -EIO;
3678
3679    bdrv_ref(top);
3680
3681    if (!top->drv || !base->drv) {
3682        goto exit;
3683    }
3684
3685    /* Make sure that base is in the backing chain of top */
3686    if (!bdrv_chain_contains(top, base)) {
3687        goto exit;
3688    }
3689
3690    /* success - we can delete the intermediate states, and link top->base */
3691    /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once
3692     * we've figured out how they should work. */
3693    backing_file_str = backing_file_str ? backing_file_str : base->filename;
3694
3695    QLIST_FOREACH_SAFE(c, &top->parents, next_parent, next) {
3696        /* Check whether we are allowed to switch c from top to base */
3697        GSList *ignore_children = g_slist_prepend(NULL, c);
3698        bdrv_check_update_perm(base, NULL, c->perm, c->shared_perm,
3699                               ignore_children, &local_err);
3700        g_slist_free(ignore_children);
3701        if (local_err) {
3702            ret = -EPERM;
3703            error_report_err(local_err);
3704            goto exit;
3705        }
3706
3707        /* If so, update the backing file path in the image file */
3708        if (c->role->update_filename) {
3709            ret = c->role->update_filename(c, base, backing_file_str,
3710                                           &local_err);
3711            if (ret < 0) {
3712                bdrv_abort_perm_update(base);
3713                error_report_err(local_err);
3714                goto exit;
3715            }
3716        }
3717
3718        /* Do the actual switch in the in-memory graph.
3719         * Completes bdrv_check_update_perm() transaction internally. */
3720        bdrv_ref(base);
3721        bdrv_replace_child(c, base);
3722        bdrv_unref(top);
3723    }
3724
3725    ret = 0;
3726exit:
3727    bdrv_unref(top);
3728    return ret;
3729}
3730
3731/**
3732 * Truncate file to 'offset' bytes (needed only for file protocols)
3733 */
3734int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
3735                  Error **errp)
3736{
3737    BlockDriverState *bs = child->bs;
3738    BlockDriver *drv = bs->drv;
3739    int ret;
3740
3741    assert(child->perm & BLK_PERM_RESIZE);
3742
3743    /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
3744    if (!drv) {
3745        error_setg(errp, "No medium inserted");
3746        return -ENOMEDIUM;
3747    }
3748    if (offset < 0) {
3749        error_setg(errp, "Image size cannot be negative");
3750        return -EINVAL;
3751    }
3752
3753    if (!drv->bdrv_truncate) {
3754        if (bs->file && drv->is_filter) {
3755            return bdrv_truncate(bs->file, offset, prealloc, errp);
3756        }
3757        error_setg(errp, "Image format driver does not support resize");
3758        return -ENOTSUP;
3759    }
3760    if (bs->read_only) {
3761        error_setg(errp, "Image is read-only");
3762        return -EACCES;
3763    }
3764
3765    assert(!(bs->open_flags & BDRV_O_INACTIVE));
3766
3767    ret = drv->bdrv_truncate(bs, offset, prealloc, errp);
3768    if (ret < 0) {
3769        return ret;
3770    }
3771    ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3772    if (ret < 0) {
3773        error_setg_errno(errp, -ret, "Could not refresh total sector count");
3774    } else {
3775        offset = bs->total_sectors * BDRV_SECTOR_SIZE;
3776    }
3777    bdrv_dirty_bitmap_truncate(bs, offset);
3778    bdrv_parent_cb_resize(bs);
3779    atomic_inc(&bs->write_gen);
3780    return ret;
3781}
3782
3783/**
3784 * Length of a allocated file in bytes. Sparse files are counted by actual
3785 * allocated space. Return < 0 if error or unknown.
3786 */
3787int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3788{
3789    BlockDriver *drv = bs->drv;
3790    if (!drv) {
3791        return -ENOMEDIUM;
3792    }
3793    if (drv->bdrv_get_allocated_file_size) {
3794        return drv->bdrv_get_allocated_file_size(bs);
3795    }
3796    if (bs->file) {
3797        return bdrv_get_allocated_file_size(bs->file->bs);
3798    }
3799    return -ENOTSUP;
3800}
3801
3802/*
3803 * bdrv_measure:
3804 * @drv: Format driver
3805 * @opts: Creation options for new image
3806 * @in_bs: Existing image containing data for new image (may be NULL)
3807 * @errp: Error object
3808 * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo())
3809 *          or NULL on error
3810 *
3811 * Calculate file size required to create a new image.
3812 *
3813 * If @in_bs is given then space for allocated clusters and zero clusters
3814 * from that image are included in the calculation.  If @opts contains a
3815 * backing file that is shared by @in_bs then backing clusters may be omitted
3816 * from the calculation.
3817 *
3818 * If @in_bs is NULL then the calculation includes no allocated clusters
3819 * unless a preallocation option is given in @opts.
3820 *
3821 * Note that @in_bs may use a different BlockDriver from @drv.
3822 *
3823 * If an error occurs the @errp pointer is set.
3824 */
3825BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
3826                               BlockDriverState *in_bs, Error **errp)
3827{
3828    if (!drv->bdrv_measure) {
3829        error_setg(errp, "Block driver '%s' does not support size measurement",
3830                   drv->format_name);
3831        return NULL;
3832    }
3833
3834    return drv->bdrv_measure(opts, in_bs, errp);
3835}
3836
3837/**
3838 * Return number of sectors on success, -errno on error.
3839 */
3840int64_t bdrv_nb_sectors(BlockDriverState *bs)
3841{
3842    BlockDriver *drv = bs->drv;
3843
3844    if (!drv)
3845        return -ENOMEDIUM;
3846
3847    if (drv->has_variable_length) {
3848        int ret = refresh_total_sectors(bs, bs->total_sectors);
3849        if (ret < 0) {
3850            return ret;
3851        }
3852    }
3853    return bs->total_sectors;
3854}
3855
3856/**
3857 * Return length in bytes on success, -errno on error.
3858 * The length is always a multiple of BDRV_SECTOR_SIZE.
3859 */
3860int64_t bdrv_getlength(BlockDriverState *bs)
3861{
3862    int64_t ret = bdrv_nb_sectors(bs);
3863
3864    ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
3865    return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
3866}
3867
3868/* return 0 as number of sectors if no device present or error */
3869void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
3870{
3871    int64_t nb_sectors = bdrv_nb_sectors(bs);
3872
3873    *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
3874}
3875
3876bool bdrv_is_sg(BlockDriverState *bs)
3877{
3878    return bs->sg;
3879}
3880
3881bool bdrv_is_encrypted(BlockDriverState *bs)
3882{
3883    if (bs->backing && bs->backing->bs->encrypted) {
3884        return true;
3885    }
3886    return bs->encrypted;
3887}
3888
3889const char *bdrv_get_format_name(BlockDriverState *bs)
3890{
3891    return bs->drv ? bs->drv->format_name : NULL;
3892}
3893
3894static int qsort_strcmp(const void *a, const void *b)
3895{
3896    return strcmp(*(char *const *)a, *(char *const *)b);
3897}
3898
3899void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
3900                         void *opaque)
3901{
3902    BlockDriver *drv;
3903    int count = 0;
3904    int i;
3905    const char **formats = NULL;
3906
3907    QLIST_FOREACH(drv, &bdrv_drivers, list) {
3908        if (drv->format_name) {
3909            bool found = false;
3910            int i = count;
3911            while (formats && i && !found) {
3912                found = !strcmp(formats[--i], drv->format_name);
3913            }
3914
3915            if (!found) {
3916                formats = g_renew(const char *, formats, count + 1);
3917                formats[count++] = drv->format_name;
3918            }
3919        }
3920    }
3921
3922    for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
3923        const char *format_name = block_driver_modules[i].format_name;
3924
3925        if (format_name) {
3926            bool found = false;
3927            int j = count;
3928
3929            while (formats && j && !found) {
3930                found = !strcmp(formats[--j], format_name);
3931            }
3932
3933            if (!found) {
3934                formats = g_renew(const char *, formats, count + 1);
3935                formats[count++] = format_name;
3936            }
3937        }
3938    }
3939
3940    qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3941
3942    for (i = 0; i < count; i++) {
3943        it(opaque, formats[i]);
3944    }
3945
3946    g_free(formats);
3947}
3948
3949/* This function is to find a node in the bs graph */
3950BlockDriverState *bdrv_find_node(const char *node_name)
3951{
3952    BlockDriverState *bs;
3953
3954    assert(node_name);
3955
3956    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3957        if (!strcmp(node_name, bs->node_name)) {
3958            return bs;
3959        }
3960    }
3961    return NULL;
3962}
3963
3964/* Put this QMP function here so it can access the static graph_bdrv_states. */
3965BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
3966{
3967    BlockDeviceInfoList *list, *entry;
3968    BlockDriverState *bs;
3969
3970    list = NULL;
3971    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3972        BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp);
3973        if (!info) {
3974            qapi_free_BlockDeviceInfoList(list);
3975            return NULL;
3976        }
3977        entry = g_malloc0(sizeof(*entry));
3978        entry->value = info;
3979        entry->next = list;
3980        list = entry;
3981    }
3982
3983    return list;
3984}
3985
3986BlockDriverState *bdrv_lookup_bs(const char *device,
3987                                 const char *node_name,
3988                                 Error **errp)
3989{
3990    BlockBackend *blk;
3991    BlockDriverState *bs;
3992
3993    if (device) {
3994        blk = blk_by_name(device);
3995
3996        if (blk) {
3997            bs = blk_bs(blk);
3998            if (!bs) {
3999                error_setg(errp, "Device '%s' has no medium", device);
4000            }
4001
4002            return bs;
4003        }
4004    }
4005
4006    if (node_name) {
4007        bs = bdrv_find_node(node_name);
4008
4009        if (bs) {
4010            return bs;
4011        }
4012    }
4013
4014    error_setg(errp, "Cannot find device=%s nor node_name=%s",
4015                     device ? device : "",
4016                     node_name ? node_name : "");
4017    return NULL;
4018}
4019
4020/* If 'base' is in the same chain as 'top', return true. Otherwise,
4021 * return false.  If either argument is NULL, return false. */
4022bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
4023{
4024    while (top && top != base) {
4025        top = backing_bs(top);
4026    }
4027
4028    return top != NULL;
4029}
4030
4031BlockDriverState *bdrv_next_node(BlockDriverState *bs)
4032{
4033    if (!bs) {
4034        return QTAILQ_FIRST(&graph_bdrv_states);
4035    }
4036    return QTAILQ_NEXT(bs, node_list);
4037}
4038
4039const char *bdrv_get_node_name(const BlockDriverState *bs)
4040{
4041    return bs->node_name;
4042}
4043
4044const char *bdrv_get_parent_name(const BlockDriverState *bs)
4045{
4046    BdrvChild *c;
4047    const char *name;
4048
4049    /* If multiple parents have a name, just pick the first one. */
4050    QLIST_FOREACH(c, &bs->parents, next_parent) {
4051        if (c->role->get_name) {
4052            name = c->role->get_name(c);
4053            if (name && *name) {
4054                return name;
4055            }
4056        }
4057    }
4058
4059    return NULL;
4060}
4061
4062/* TODO check what callers really want: bs->node_name or blk_name() */
4063const char *bdrv_get_device_name(const BlockDriverState *bs)
4064{
4065    return bdrv_get_parent_name(bs) ?: "";
4066}
4067
4068/* This can be used to identify nodes that might not have a device
4069 * name associated. Since node and device names live in the same
4070 * namespace, the result is unambiguous. The exception is if both are
4071 * absent, then this returns an empty (non-null) string. */
4072const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
4073{
4074    return bdrv_get_parent_name(bs) ?: bs->node_name;
4075}
4076
4077int bdrv_get_flags(BlockDriverState *bs)
4078{
4079    return bs->open_flags;
4080}
4081
4082int bdrv_has_zero_init_1(BlockDriverState *bs)
4083{
4084    return 1;
4085}
4086
4087int bdrv_has_zero_init(BlockDriverState *bs)
4088{
4089    if (!bs->drv) {
4090        return 0;
4091    }
4092
4093    /* If BS is a copy on write image, it is initialized to
4094       the contents of the base image, which may not be zeroes.  */
4095    if (bs->backing) {
4096        return 0;
4097    }
4098    if (bs->drv->bdrv_has_zero_init) {
4099        return bs->drv->bdrv_has_zero_init(bs);
4100    }
4101    if (bs->file && bs->drv->is_filter) {
4102        return bdrv_has_zero_init(bs->file->bs);
4103    }
4104
4105    /* safe default */
4106    return 0;
4107}
4108
4109bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
4110{
4111    BlockDriverInfo bdi;
4112
4113    if (bs->backing) {
4114        return false;
4115    }
4116
4117    if (bdrv_get_info(bs, &bdi) == 0) {
4118        return bdi.unallocated_blocks_are_zero;
4119    }
4120
4121    return false;
4122}
4123
4124bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
4125{
4126    if (!(bs->open_flags & BDRV_O_UNMAP)) {
4127        return false;
4128    }
4129
4130    return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP;
4131}
4132
4133const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4134{
4135    if (bs->backing && bs->backing->bs->encrypted)
4136        return bs->backing_file;
4137    else if (bs->encrypted)
4138        return bs->filename;
4139    else
4140        return NULL;
4141}
4142
4143void bdrv_get_backing_filename(BlockDriverState *bs,
4144                               char *filename, int filename_size)
4145{
4146    pstrcpy(filename, filename_size, bs->backing_file);
4147}
4148
4149int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4150{
4151    BlockDriver *drv = bs->drv;
4152    /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
4153    if (!drv) {
4154        return -ENOMEDIUM;
4155    }
4156    if (!drv->bdrv_get_info) {
4157        if (bs->file && drv->is_filter) {
4158            return bdrv_get_info(bs->file->bs, bdi);
4159        }
4160        return -ENOTSUP;
4161    }
4162    memset(bdi, 0, sizeof(*bdi));
4163    return drv->bdrv_get_info(bs, bdi);
4164}
4165
4166ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4167{
4168    BlockDriver *drv = bs->drv;
4169    if (drv && drv->bdrv_get_specific_info) {
4170        return drv->bdrv_get_specific_info(bs);
4171    }
4172    return NULL;
4173}
4174
4175void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
4176{
4177    if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
4178        return;
4179    }
4180
4181    bs->drv->bdrv_debug_event(bs, event);
4182}
4183
4184int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4185                          const char *tag)
4186{
4187    while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4188        bs = bs->file ? bs->file->bs : NULL;
4189    }
4190
4191    if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4192        return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4193    }
4194
4195    return -ENOTSUP;
4196}
4197
4198int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4199{
4200    while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4201        bs = bs->file ? bs->file->bs : NULL;
4202    }
4203
4204    if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4205        return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4206    }
4207
4208    return -ENOTSUP;
4209}
4210
4211int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4212{
4213    while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
4214        bs = bs->file ? bs->file->bs : NULL;
4215    }
4216
4217    if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4218        return bs->drv->bdrv_debug_resume(bs, tag);
4219    }
4220
4221    return -ENOTSUP;
4222}
4223
4224bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4225{
4226    while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4227        bs = bs->file ? bs->file->bs : NULL;
4228    }
4229
4230    if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4231        return bs->drv->bdrv_debug_is_suspended(bs, tag);
4232    }
4233
4234    return false;
4235}
4236
4237/* backing_file can either be relative, or absolute, or a protocol.  If it is
4238 * relative, it must be relative to the chain.  So, passing in bs->filename
4239 * from a BDS as backing_file should not be done, as that may be relative to
4240 * the CWD rather than the chain. */
4241BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4242        const char *backing_file)
4243{
4244    char *filename_full = NULL;
4245    char *backing_file_full = NULL;
4246    char *filename_tmp = NULL;
4247    int is_protocol = 0;
4248    BlockDriverState *curr_bs = NULL;
4249    BlockDriverState *retval = NULL;
4250    Error *local_error = NULL;
4251
4252    if (!bs || !bs->drv || !backing_file) {
4253        return NULL;
4254    }
4255
4256    filename_full     = g_malloc(PATH_MAX);
4257    backing_file_full = g_malloc(PATH_MAX);
4258    filename_tmp      = g_malloc(PATH_MAX);
4259
4260    is_protocol = path_has_protocol(backing_file);
4261
4262    for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
4263
4264        /* If either of the filename paths is actually a protocol, then
4265         * compare unmodified paths; otherwise make paths relative */
4266        if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4267            if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4268                retval = curr_bs->backing->bs;
4269                break;
4270            }
4271            /* Also check against the full backing filename for the image */
4272            bdrv_get_full_backing_filename(curr_bs, backing_file_full, PATH_MAX,
4273                                           &local_error);
4274            if (local_error == NULL) {
4275                if (strcmp(backing_file, backing_file_full) == 0) {
4276                    retval = curr_bs->backing->bs;
4277                    break;
4278                }
4279            } else {
4280                error_free(local_error);
4281                local_error = NULL;
4282            }
4283        } else {
4284            /* If not an absolute filename path, make it relative to the current
4285             * image's filename path */
4286            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4287                         backing_file);
4288
4289            /* We are going to compare absolute pathnames */
4290            if (!realpath(filename_tmp, filename_full)) {
4291                continue;
4292            }
4293
4294            /* We need to make sure the backing filename we are comparing against
4295             * is relative to the current image filename (or absolute) */
4296            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4297                         curr_bs->backing_file);
4298
4299            if (!realpath(filename_tmp, backing_file_full)) {
4300                continue;
4301            }
4302
4303            if (strcmp(backing_file_full, filename_full) == 0) {
4304                retval = curr_bs->backing->bs;
4305                break;
4306            }
4307        }
4308    }
4309
4310    g_free(filename_full);
4311    g_free(backing_file_full);
4312    g_free(filename_tmp);
4313    return retval;
4314}
4315
4316void bdrv_init(void)
4317{
4318    module_call_init(MODULE_INIT_BLOCK);
4319}
4320
4321void bdrv_init_with_whitelist(void)
4322{
4323    use_bdrv_whitelist = 1;
4324    bdrv_init();
4325}
4326
4327static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
4328                                                  Error **errp)
4329{
4330    BdrvChild *child, *parent;
4331    uint64_t perm, shared_perm;
4332    Error *local_err = NULL;
4333    int ret;
4334
4335    if (!bs->drv)  {
4336        return;
4337    }
4338
4339    if (!(bs->open_flags & BDRV_O_INACTIVE)) {
4340        return;
4341    }
4342
4343    QLIST_FOREACH(child, &bs->children, next) {
4344        bdrv_co_invalidate_cache(child->bs, &local_err);
4345        if (local_err) {
4346            error_propagate(errp, local_err);
4347            return;
4348        }
4349    }
4350
4351    /*
4352     * Update permissions, they may differ for inactive nodes.
4353     *
4354     * Note that the required permissions of inactive images are always a
4355     * subset of the permissions required after activating the image. This
4356     * allows us to just get the permissions upfront without restricting
4357     * drv->bdrv_invalidate_cache().
4358     *
4359     * It also means that in error cases, we don't have to try and revert to
4360     * the old permissions (which is an operation that could fail, too). We can
4361     * just keep the extended permissions for the next time that an activation
4362     * of the image is tried.
4363     */
4364    bs->open_flags &= ~BDRV_O_INACTIVE;
4365    bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
4366    ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err);
4367    if (ret < 0) {
4368        bs->open_flags |= BDRV_O_INACTIVE;
4369        error_propagate(errp, local_err);
4370        return;
4371    }
4372    bdrv_set_perm(bs, perm, shared_perm);
4373
4374    if (bs->drv->bdrv_co_invalidate_cache) {
4375        bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
4376        if (local_err) {
4377            bs->open_flags |= BDRV_O_INACTIVE;
4378            error_propagate(errp, local_err);
4379            return;
4380        }
4381    }
4382
4383    ret = refresh_total_sectors(bs, bs->total_sectors);
4384    if (ret < 0) {
4385        bs->open_flags |= BDRV_O_INACTIVE;
4386        error_setg_errno(errp, -ret, "Could not refresh total sector count");
4387        return;
4388    }
4389
4390    QLIST_FOREACH(parent, &bs->parents, next_parent) {
4391        if (parent->role->activate) {
4392            parent->role->activate(parent, &local_err);
4393            if (local_err) {
4394                error_propagate(errp, local_err);
4395                return;
4396            }
4397        }
4398    }
4399}
4400
4401typedef struct InvalidateCacheCo {
4402    BlockDriverState *bs;
4403    Error **errp;
4404    bool done;
4405} InvalidateCacheCo;
4406
4407static void coroutine_fn bdrv_invalidate_cache_co_entry(void *opaque)
4408{
4409    InvalidateCacheCo *ico = opaque;
4410    bdrv_co_invalidate_cache(ico->bs, ico->errp);
4411    ico->done = true;
4412}
4413
4414void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
4415{
4416    Coroutine *co;
4417    InvalidateCacheCo ico = {
4418        .bs = bs,
4419        .done = false,
4420        .errp = errp
4421    };
4422
4423    if (qemu_in_coroutine()) {
4424        /* Fast-path if already in coroutine context */
4425        bdrv_invalidate_cache_co_entry(&ico);
4426    } else {
4427        co = qemu_coroutine_create(bdrv_invalidate_cache_co_entry, &ico);
4428        qemu_coroutine_enter(co);
4429        BDRV_POLL_WHILE(bs, !ico.done);
4430    }
4431}
4432
4433void bdrv_invalidate_cache_all(Error **errp)
4434{
4435    BlockDriverState *bs;
4436    Error *local_err = NULL;
4437    BdrvNextIterator it;
4438
4439    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
4440        AioContext *aio_context = bdrv_get_aio_context(bs);
4441
4442        aio_context_acquire(aio_context);
4443        bdrv_invalidate_cache(bs, &local_err);
4444        aio_context_release(aio_context);
4445        if (local_err) {
4446            error_propagate(errp, local_err);
4447            bdrv_next_cleanup(&it);
4448            return;
4449        }
4450    }
4451}
4452
4453static int bdrv_inactivate_recurse(BlockDriverState *bs,
4454                                   bool setting_flag)
4455{
4456    BdrvChild *child, *parent;
4457    int ret;
4458
4459    if (!bs->drv) {
4460        return -ENOMEDIUM;
4461    }
4462
4463    if (!setting_flag && bs->drv->bdrv_inactivate) {
4464        ret = bs->drv->bdrv_inactivate(bs);
4465        if (ret < 0) {
4466            return ret;
4467        }
4468    }
4469
4470    if (setting_flag && !(bs->open_flags & BDRV_O_INACTIVE)) {
4471        uint64_t perm, shared_perm;
4472
4473        QLIST_FOREACH(parent, &bs->parents, next_parent) {
4474            if (parent->role->inactivate) {
4475                ret = parent->role->inactivate(parent);
4476                if (ret < 0) {
4477                    return ret;
4478                }
4479            }
4480        }
4481
4482        bs->open_flags |= BDRV_O_INACTIVE;
4483
4484        /* Update permissions, they may differ for inactive nodes */
4485        bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
4486        bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &error_abort);
4487        bdrv_set_perm(bs, perm, shared_perm);
4488    }
4489
4490    QLIST_FOREACH(child, &bs->children, next) {
4491        ret = bdrv_inactivate_recurse(child->bs, setting_flag);
4492        if (ret < 0) {
4493            return ret;
4494        }
4495    }
4496
4497    /* At this point persistent bitmaps should be already stored by the format
4498     * driver */
4499    bdrv_release_persistent_dirty_bitmaps(bs);
4500
4501    return 0;
4502}
4503
4504int bdrv_inactivate_all(void)
4505{
4506    BlockDriverState *bs = NULL;
4507    BdrvNextIterator it;
4508    int ret = 0;
4509    int pass;
4510    GSList *aio_ctxs = NULL, *ctx;
4511
4512    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
4513        AioContext *aio_context = bdrv_get_aio_context(bs);
4514
4515        if (!g_slist_find(aio_ctxs, aio_context)) {
4516            aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
4517            aio_context_acquire(aio_context);
4518        }
4519    }
4520
4521    /* We do two passes of inactivation. The first pass calls to drivers'
4522     * .bdrv_inactivate callbacks recursively so all cache is flushed to disk;
4523     * the second pass sets the BDRV_O_INACTIVE flag so that no further write
4524     * is allowed. */
4525    for (pass = 0; pass < 2; pass++) {
4526        for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
4527            ret = bdrv_inactivate_recurse(bs, pass);
4528            if (ret < 0) {
4529                bdrv_next_cleanup(&it);
4530                goto out;
4531            }
4532        }
4533    }
4534
4535out:
4536    for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
4537        AioContext *aio_context = ctx->data;
4538        aio_context_release(aio_context);
4539    }
4540    g_slist_free(aio_ctxs);
4541
4542    return ret;
4543}
4544
4545/**************************************************************/
4546/* removable device support */
4547
4548/**
4549 * Return TRUE if the media is present
4550 */
4551bool bdrv_is_inserted(BlockDriverState *bs)
4552{
4553    BlockDriver *drv = bs->drv;
4554    BdrvChild *child;
4555
4556    if (!drv) {
4557        return false;
4558    }
4559    if (drv->bdrv_is_inserted) {
4560        return drv->bdrv_is_inserted(bs);
4561    }
4562    QLIST_FOREACH(child, &bs->children, next) {
4563        if (!bdrv_is_inserted(child->bs)) {
4564            return false;
4565        }
4566    }
4567    return true;
4568}
4569
4570/**
4571 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
4572 */
4573void bdrv_eject(BlockDriverState *bs, bool eject_flag)
4574{
4575    BlockDriver *drv = bs->drv;
4576
4577    if (drv && drv->bdrv_eject) {
4578        drv->bdrv_eject(bs, eject_flag);
4579    }
4580}
4581
4582/**
4583 * Lock or unlock the media (if it is locked, the user won't be able
4584 * to eject it manually).
4585 */
4586void bdrv_lock_medium(BlockDriverState *bs, bool locked)
4587{
4588    BlockDriver *drv = bs->drv;
4589
4590    trace_bdrv_lock_medium(bs, locked);
4591
4592    if (drv && drv->bdrv_lock_medium) {
4593        drv->bdrv_lock_medium(bs, locked);
4594    }
4595}
4596
4597/* Get a reference to bs */
4598void bdrv_ref(BlockDriverState *bs)
4599{
4600    bs->refcnt++;
4601}
4602
4603/* Release a previously grabbed reference to bs.
4604 * If after releasing, reference count is zero, the BlockDriverState is
4605 * deleted. */
4606void bdrv_unref(BlockDriverState *bs)
4607{
4608    if (!bs) {
4609        return;
4610    }
4611    assert(bs->refcnt > 0);
4612    if (--bs->refcnt == 0) {
4613        bdrv_delete(bs);
4614    }
4615}
4616
4617struct BdrvOpBlocker {
4618    Error *reason;
4619    QLIST_ENTRY(BdrvOpBlocker) list;
4620};
4621
4622bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
4623{
4624    BdrvOpBlocker *blocker;
4625    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
4626    if (!QLIST_EMPTY(&bs->op_blockers[op])) {
4627        blocker = QLIST_FIRST(&bs->op_blockers[op]);
4628        error_propagate(errp, error_copy(blocker->reason));
4629        error_prepend(errp, "Node '%s' is busy: ",
4630                      bdrv_get_device_or_node_name(bs));
4631        return true;
4632    }
4633    return false;
4634}
4635
4636void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
4637{
4638    BdrvOpBlocker *blocker;
4639    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
4640
4641    blocker = g_new0(BdrvOpBlocker, 1);
4642    blocker->reason = reason;
4643    QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
4644}
4645
4646void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
4647{
4648    BdrvOpBlocker *blocker, *next;
4649    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
4650    QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
4651        if (blocker->reason == reason) {
4652            QLIST_REMOVE(blocker, list);
4653            g_free(blocker);
4654        }
4655    }
4656}
4657
4658void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
4659{
4660    int i;
4661    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
4662        bdrv_op_block(bs, i, reason);
4663    }
4664}
4665
4666void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
4667{
4668    int i;
4669    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
4670        bdrv_op_unblock(bs, i, reason);
4671    }
4672}
4673
4674bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
4675{
4676    int i;
4677
4678    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
4679        if (!QLIST_EMPTY(&bs->op_blockers[i])) {
4680            return false;
4681        }
4682    }
4683    return true;
4684}
4685
4686void bdrv_img_create(const char *filename, const char *fmt,
4687                     const char *base_filename, const char *base_fmt,
4688                     char *options, uint64_t img_size, int flags, bool quiet,
4689                     Error **errp)
4690{
4691    QemuOptsList *create_opts = NULL;
4692    QemuOpts *opts = NULL;
4693    const char *backing_fmt, *backing_file;
4694    int64_t size;
4695    BlockDriver *drv, *proto_drv;
4696    Error *local_err = NULL;
4697    int ret = 0;
4698
4699    /* Find driver and parse its options */
4700    drv = bdrv_find_format(fmt);
4701    if (!drv) {
4702        error_setg(errp, "Unknown file format '%s'", fmt);
4703        return;
4704    }
4705
4706    proto_drv = bdrv_find_protocol(filename, true, errp);
4707    if (!proto_drv) {
4708        return;
4709    }
4710
4711    if (!drv->create_opts) {
4712        error_setg(errp, "Format driver '%s' does not support image creation",
4713                   drv->format_name);
4714        return;
4715    }
4716
4717    if (!proto_drv->create_opts) {
4718        error_setg(errp, "Protocol driver '%s' does not support image creation",
4719                   proto_drv->format_name);
4720        return;
4721    }
4722
4723    create_opts = qemu_opts_append(create_opts, drv->create_opts);
4724    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4725
4726    /* Create parameter list with default values */
4727    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4728    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
4729
4730    /* Parse -o options */
4731    if (options) {
4732        qemu_opts_do_parse(opts, options, NULL, &local_err);
4733        if (local_err) {
4734            error_report_err(local_err);
4735            local_err = NULL;
4736            error_setg(errp, "Invalid options for file format '%s'", fmt);
4737            goto out;
4738        }
4739    }
4740
4741    if (base_filename) {
4742        qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
4743        if (local_err) {
4744            error_setg(errp, "Backing file not supported for file format '%s'",
4745                       fmt);
4746            goto out;
4747        }
4748    }
4749
4750    if (base_fmt) {
4751        qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
4752        if (local_err) {
4753            error_setg(errp, "Backing file format not supported for file "
4754                             "format '%s'", fmt);
4755            goto out;
4756        }
4757    }
4758
4759    backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
4760    if (backing_file) {
4761        if (!strcmp(filename, backing_file)) {
4762            error_setg(errp, "Error: Trying to create an image with the "
4763                             "same filename as the backing file");
4764            goto out;
4765        }
4766    }
4767
4768    backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
4769
4770    /* The size for the image must always be specified, unless we have a backing
4771     * file and we have not been forbidden from opening it. */
4772    size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size);
4773    if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
4774        BlockDriverState *bs;
4775        char *full_backing = g_new0(char, PATH_MAX);
4776        int back_flags;
4777        QDict *backing_options = NULL;
4778
4779        bdrv_get_full_backing_filename_from_filename(filename, backing_file,
4780                                                     full_backing, PATH_MAX,
4781                                                     &local_err);
4782        if (local_err) {
4783            g_free(full_backing);
4784            goto out;
4785        }
4786
4787        /* backing files always opened read-only */
4788        back_flags = flags;
4789        back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
4790
4791        backing_options = qdict_new();
4792        if (backing_fmt) {
4793            qdict_put_str(backing_options, "driver", backing_fmt);
4794        }
4795        qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
4796
4797        bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
4798                       &local_err);
4799        g_free(full_backing);
4800        if (!bs && size != -1) {
4801            /* Couldn't open BS, but we have a size, so it's nonfatal */
4802            warn_reportf_err(local_err,
4803                            "Could not verify backing image. "
4804                            "This may become an error in future versions.\n");
4805            local_err = NULL;
4806        } else if (!bs) {
4807            /* Couldn't open bs, do not have size */
4808            error_append_hint(&local_err,
4809                              "Could not open backing image to determine size.\n");
4810            goto out;
4811        } else {
4812            if (size == -1) {
4813                /* Opened BS, have no size */
4814                size = bdrv_getlength(bs);
4815                if (size < 0) {
4816                    error_setg_errno(errp, -size, "Could not get size of '%s'",
4817                                     backing_file);
4818                    bdrv_unref(bs);
4819                    goto out;
4820                }
4821                qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
4822            }
4823            bdrv_unref(bs);
4824        }
4825    } /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
4826
4827    if (size == -1) {
4828        error_setg(errp, "Image creation needs a size parameter");
4829        goto out;
4830    }
4831
4832    if (!quiet) {
4833        printf("Formatting '%s', fmt=%s ", filename, fmt);
4834        qemu_opts_print(opts, " ");
4835        puts("");
4836    }
4837
4838    ret = bdrv_create(drv, filename, opts, &local_err);
4839
4840    if (ret == -EFBIG) {
4841        /* This is generally a better message than whatever the driver would
4842         * deliver (especially because of the cluster_size_hint), since that
4843         * is most probably not much different from "image too large". */
4844        const char *cluster_size_hint = "";
4845        if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
4846            cluster_size_hint = " (try using a larger cluster size)";
4847        }
4848        error_setg(errp, "The image size is too large for file format '%s'"
4849                   "%s", fmt, cluster_size_hint);
4850        error_free(local_err);
4851        local_err = NULL;
4852    }
4853
4854out:
4855    qemu_opts_del(opts);
4856    qemu_opts_free(create_opts);
4857    error_propagate(errp, local_err);
4858}
4859
4860AioContext *bdrv_get_aio_context(BlockDriverState *bs)
4861{
4862    return bs ? bs->aio_context : qemu_get_aio_context();
4863}
4864
4865AioWait *bdrv_get_aio_wait(BlockDriverState *bs)
4866{
4867    return bs ? &bs->wait : NULL;
4868}
4869
4870void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
4871{
4872    aio_co_enter(bdrv_get_aio_context(bs), co);
4873}
4874
4875static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
4876{
4877    QLIST_REMOVE(ban, list);
4878    g_free(ban);
4879}
4880
4881void bdrv_detach_aio_context(BlockDriverState *bs)
4882{
4883    BdrvAioNotifier *baf, *baf_tmp;
4884    BdrvChild *child;
4885
4886    if (!bs->drv) {
4887        return;
4888    }
4889
4890    assert(!bs->walking_aio_notifiers);
4891    bs->walking_aio_notifiers = true;
4892    QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
4893        if (baf->deleted) {
4894            bdrv_do_remove_aio_context_notifier(baf);
4895        } else {
4896            baf->detach_aio_context(baf->opaque);
4897        }
4898    }
4899    /* Never mind iterating again to check for ->deleted.  bdrv_close() will
4900     * remove remaining aio notifiers if we aren't called again.
4901     */
4902    bs->walking_aio_notifiers = false;
4903
4904    if (bs->drv->bdrv_detach_aio_context) {
4905        bs->drv->bdrv_detach_aio_context(bs);
4906    }
4907    QLIST_FOREACH(child, &bs->children, next) {
4908        bdrv_detach_aio_context(child->bs);
4909    }
4910
4911    bs->aio_context = NULL;
4912}
4913
4914void bdrv_attach_aio_context(BlockDriverState *bs,
4915                             AioContext *new_context)
4916{
4917    BdrvAioNotifier *ban, *ban_tmp;
4918    BdrvChild *child;
4919
4920    if (!bs->drv) {
4921        return;
4922    }
4923
4924    bs->aio_context = new_context;
4925
4926    QLIST_FOREACH(child, &bs->children, next) {
4927        bdrv_attach_aio_context(child->bs, new_context);
4928    }
4929    if (bs->drv->bdrv_attach_aio_context) {
4930        bs->drv->bdrv_attach_aio_context(bs, new_context);
4931    }
4932
4933    assert(!bs->walking_aio_notifiers);
4934    bs->walking_aio_notifiers = true;
4935    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) {
4936        if (ban->deleted) {
4937            bdrv_do_remove_aio_context_notifier(ban);
4938        } else {
4939            ban->attached_aio_context(new_context, ban->opaque);
4940        }
4941    }
4942    bs->walking_aio_notifiers = false;
4943}
4944
4945void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
4946{
4947    AioContext *ctx = bdrv_get_aio_context(bs);
4948
4949    aio_disable_external(ctx);
4950    bdrv_parent_drained_begin(bs, NULL);
4951    bdrv_drain(bs); /* ensure there are no in-flight requests */
4952
4953    while (aio_poll(ctx, false)) {
4954        /* wait for all bottom halves to execute */
4955    }
4956
4957    bdrv_detach_aio_context(bs);
4958
4959    /* This function executes in the old AioContext so acquire the new one in
4960     * case it runs in a different thread.
4961     */
4962    aio_context_acquire(new_context);
4963    bdrv_attach_aio_context(bs, new_context);
4964    bdrv_parent_drained_end(bs, NULL);
4965    aio_enable_external(ctx);
4966    aio_context_release(new_context);
4967}
4968
4969void bdrv_add_aio_context_notifier(BlockDriverState *bs,
4970        void (*attached_aio_context)(AioContext *new_context, void *opaque),
4971        void (*detach_aio_context)(void *opaque), void *opaque)
4972{
4973    BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
4974    *ban = (BdrvAioNotifier){
4975        .attached_aio_context = attached_aio_context,
4976        .detach_aio_context   = detach_aio_context,
4977        .opaque               = opaque
4978    };
4979
4980    QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
4981}
4982
4983void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
4984                                      void (*attached_aio_context)(AioContext *,
4985                                                                   void *),
4986                                      void (*detach_aio_context)(void *),
4987                                      void *opaque)
4988{
4989    BdrvAioNotifier *ban, *ban_next;
4990
4991    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4992        if (ban->attached_aio_context == attached_aio_context &&
4993            ban->detach_aio_context   == detach_aio_context   &&
4994            ban->opaque               == opaque               &&
4995            ban->deleted              == false)
4996        {
4997            if (bs->walking_aio_notifiers) {
4998                ban->deleted = true;
4999            } else {
5000                bdrv_do_remove_aio_context_notifier(ban);
5001            }
5002            return;
5003        }
5004    }
5005
5006    abort();
5007}
5008
5009int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
5010                       BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
5011{
5012    if (!bs->drv) {
5013        return -ENOMEDIUM;
5014    }
5015    if (!bs->drv->bdrv_amend_options) {
5016        return -ENOTSUP;
5017    }
5018    return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
5019}
5020
5021/* This function will be called by the bdrv_recurse_is_first_non_filter method
5022 * of block filter and by bdrv_is_first_non_filter.
5023 * It is used to test if the given bs is the candidate or recurse more in the
5024 * node graph.
5025 */
5026bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5027                                      BlockDriverState *candidate)
5028{
5029    /* return false if basic checks fails */
5030    if (!bs || !bs->drv) {
5031        return false;
5032    }
5033
5034    /* the code reached a non block filter driver -> check if the bs is
5035     * the same as the candidate. It's the recursion termination condition.
5036     */
5037    if (!bs->drv->is_filter) {
5038        return bs == candidate;
5039    }
5040    /* Down this path the driver is a block filter driver */
5041
5042    /* If the block filter recursion method is defined use it to recurse down
5043     * the node graph.
5044     */
5045    if (bs->drv->bdrv_recurse_is_first_non_filter) {
5046        return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5047    }
5048
5049    /* the driver is a block filter but don't allow to recurse -> return false
5050     */
5051    return false;
5052}
5053
5054/* This function checks if the candidate is the first non filter bs down it's
5055 * bs chain. Since we don't have pointers to parents it explore all bs chains
5056 * from the top. Some filters can choose not to pass down the recursion.
5057 */
5058bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5059{
5060    BlockDriverState *bs;
5061    BdrvNextIterator it;
5062
5063    /* walk down the bs forest recursively */
5064    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
5065        bool perm;
5066
5067        /* try to recurse in this top level bs */
5068        perm = bdrv_recurse_is_first_non_filter(bs, candidate);
5069
5070        /* candidate is the first non filter */
5071        if (perm) {
5072            bdrv_next_cleanup(&it);
5073            return true;
5074        }
5075    }
5076
5077    return false;
5078}
5079
5080BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
5081                                        const char *node_name, Error **errp)
5082{
5083    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5084    AioContext *aio_context;
5085
5086    if (!to_replace_bs) {
5087        error_setg(errp, "Node name '%s' not found", node_name);
5088        return NULL;
5089    }
5090
5091    aio_context = bdrv_get_aio_context(to_replace_bs);
5092    aio_context_acquire(aio_context);
5093
5094    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5095        to_replace_bs = NULL;
5096        goto out;
5097    }
5098
5099    /* We don't want arbitrary node of the BDS chain to be replaced only the top
5100     * most non filter in order to prevent data corruption.
5101     * Another benefit is that this tests exclude backing files which are
5102     * blocked by the backing blockers.
5103     */
5104    if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
5105        error_setg(errp, "Only top most non filter can be replaced");
5106        to_replace_bs = NULL;
5107        goto out;
5108    }
5109
5110out:
5111    aio_context_release(aio_context);
5112    return to_replace_bs;
5113}
5114
5115static bool append_open_options(QDict *d, BlockDriverState *bs)
5116{
5117    const QDictEntry *entry;
5118    QemuOptDesc *desc;
5119    BdrvChild *child;
5120    bool found_any = false;
5121    const char *p;
5122
5123    for (entry = qdict_first(bs->options); entry;
5124         entry = qdict_next(bs->options, entry))
5125    {
5126        /* Exclude options for children */
5127        QLIST_FOREACH(child, &bs->children, next) {
5128            if (strstart(qdict_entry_key(entry), child->name, &p)
5129                && (!*p || *p == '.'))
5130            {
5131                break;
5132            }
5133        }
5134        if (child) {
5135            continue;
5136        }
5137
5138        /* And exclude all non-driver-specific options */
5139        for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
5140            if (!strcmp(qdict_entry_key(entry), desc->name)) {
5141                break;
5142            }
5143        }
5144        if (desc->name) {
5145            continue;
5146        }
5147
5148        qobject_incref(qdict_entry_value(entry));
5149        qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
5150        found_any = true;
5151    }
5152
5153    return found_any;
5154}
5155
5156/* Updates the following BDS fields:
5157 *  - exact_filename: A filename which may be used for opening a block device
5158 *                    which (mostly) equals the given BDS (even without any
5159 *                    other options; so reading and writing must return the same
5160 *                    results, but caching etc. may be different)
5161 *  - full_open_options: Options which, when given when opening a block device
5162 *                       (without a filename), result in a BDS (mostly)
5163 *                       equalling the given one
5164 *  - filename: If exact_filename is set, it is copied here. Otherwise,
5165 *              full_open_options is converted to a JSON object, prefixed with
5166 *              "json:" (for use through the JSON pseudo protocol) and put here.
5167 */
5168void bdrv_refresh_filename(BlockDriverState *bs)
5169{
5170    BlockDriver *drv = bs->drv;
5171    QDict *opts;
5172
5173    if (!drv) {
5174        return;
5175    }
5176
5177    /* This BDS's file name will most probably depend on its file's name, so
5178     * refresh that first */
5179    if (bs->file) {
5180        bdrv_refresh_filename(bs->file->bs);
5181    }
5182
5183    if (drv->bdrv_refresh_filename) {
5184        /* Obsolete information is of no use here, so drop the old file name
5185         * information before refreshing it */
5186        bs->exact_filename[0] = '\0';
5187        if (bs->full_open_options) {
5188            QDECREF(bs->full_open_options);
5189            bs->full_open_options = NULL;
5190        }
5191
5192        opts = qdict_new();
5193        append_open_options(opts, bs);
5194        drv->bdrv_refresh_filename(bs, opts);
5195        QDECREF(opts);
5196    } else if (bs->file) {
5197        /* Try to reconstruct valid information from the underlying file */
5198        bool has_open_options;
5199
5200        bs->exact_filename[0] = '\0';
5201        if (bs->full_open_options) {
5202            QDECREF(bs->full_open_options);
5203            bs->full_open_options = NULL;
5204        }
5205
5206        opts = qdict_new();
5207        has_open_options = append_open_options(opts, bs);
5208
5209        /* If no specific options have been given for this BDS, the filename of
5210         * the underlying file should suffice for this one as well */
5211        if (bs->file->bs->exact_filename[0] && !has_open_options) {
5212            strcpy(bs->exact_filename, bs->file->bs->exact_filename);
5213        }
5214        /* Reconstructing the full options QDict is simple for most format block
5215         * drivers, as long as the full options are known for the underlying
5216         * file BDS. The full options QDict of that file BDS should somehow
5217         * contain a representation of the filename, therefore the following
5218         * suffices without querying the (exact_)filename of this BDS. */
5219        if (bs->file->bs->full_open_options) {
5220            qdict_put_str(opts, "driver", drv->format_name);
5221            QINCREF(bs->file->bs->full_open_options);
5222            qdict_put(opts, "file", bs->file->bs->full_open_options);
5223
5224            bs->full_open_options = opts;
5225        } else {
5226            QDECREF(opts);
5227        }
5228    } else if (!bs->full_open_options && qdict_size(bs->options)) {
5229        /* There is no underlying file BDS (at least referenced by BDS.file),
5230         * so the full options QDict should be equal to the options given
5231         * specifically for this block device when it was opened (plus the
5232         * driver specification).
5233         * Because those options don't change, there is no need to update
5234         * full_open_options when it's already set. */
5235
5236        opts = qdict_new();
5237        append_open_options(opts, bs);
5238        qdict_put_str(opts, "driver", drv->format_name);
5239
5240        if (bs->exact_filename[0]) {
5241            /* This may not work for all block protocol drivers (some may
5242             * require this filename to be parsed), but we have to find some
5243             * default solution here, so just include it. If some block driver
5244             * does not support pure options without any filename at all or
5245             * needs some special format of the options QDict, it needs to
5246             * implement the driver-specific bdrv_refresh_filename() function.
5247             */
5248            qdict_put_str(opts, "filename", bs->exact_filename);
5249        }
5250
5251        bs->full_open_options = opts;
5252    }
5253
5254    if (bs->exact_filename[0]) {
5255        pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
5256    } else if (bs->full_open_options) {
5257        QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
5258        snprintf(bs->filename, sizeof(bs->filename), "json:%s",
5259                 qstring_get_str(json));
5260        QDECREF(json);
5261    }
5262}
5263
5264/*
5265 * Hot add/remove a BDS's child. So the user can take a child offline when
5266 * it is broken and take a new child online
5267 */
5268void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
5269                    Error **errp)
5270{
5271
5272    if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
5273        error_setg(errp, "The node %s does not support adding a child",
5274                   bdrv_get_device_or_node_name(parent_bs));
5275        return;
5276    }
5277
5278    if (!QLIST_EMPTY(&child_bs->parents)) {
5279        error_setg(errp, "The node %s already has a parent",
5280                   child_bs->node_name);
5281        return;
5282    }
5283
5284    parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
5285}
5286
5287void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
5288{
5289    BdrvChild *tmp;
5290
5291    if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
5292        error_setg(errp, "The node %s does not support removing a child",
5293                   bdrv_get_device_or_node_name(parent_bs));
5294        return;
5295    }
5296
5297    QLIST_FOREACH(tmp, &parent_bs->children, next) {
5298        if (tmp == child) {
5299            break;
5300        }
5301    }
5302
5303    if (!tmp) {
5304        error_setg(errp, "The node %s does not have a child named %s",
5305                   bdrv_get_device_or_node_name(parent_bs),
5306                   bdrv_get_device_or_node_name(child->bs));
5307        return;
5308    }
5309
5310    parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
5311}
5312
5313bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
5314                                     uint32_t granularity, Error **errp)
5315{
5316    BlockDriver *drv = bs->drv;
5317
5318    if (!drv) {
5319        error_setg_errno(errp, ENOMEDIUM,
5320                         "Can't store persistent bitmaps to %s",
5321                         bdrv_get_device_or_node_name(bs));
5322        return false;
5323    }
5324
5325    if (!drv->bdrv_can_store_new_dirty_bitmap) {
5326        error_setg_errno(errp, ENOTSUP,
5327                         "Can't store persistent bitmaps to %s",
5328                         bdrv_get_device_or_node_name(bs));
5329        return false;
5330    }
5331
5332    return drv->bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp);
5333}
5334