qemu/blockdev.c
<<
>>
Prefs
   1/*
   2 * QEMU host block devices
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or
   7 * later.  See the COPYING file in the top-level directory.
   8 *
   9 * This file incorporates work covered by the following copyright and
  10 * permission notice:
  11 *
  12 * Copyright (c) 2003-2008 Fabrice Bellard
  13 *
  14 * Permission is hereby granted, free of charge, to any person obtaining a copy
  15 * of this software and associated documentation files (the "Software"), to deal
  16 * in the Software without restriction, including without limitation the rights
  17 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  18 * copies of the Software, and to permit persons to whom the Software is
  19 * furnished to do so, subject to the following conditions:
  20 *
  21 * The above copyright notice and this permission notice shall be included in
  22 * all copies or substantial portions of the Software.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  27 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  29 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  30 * THE SOFTWARE.
  31 */
  32
  33#include "qemu/osdep.h"
  34#include "sysemu/block-backend.h"
  35#include "sysemu/blockdev.h"
  36#include "hw/block/block.h"
  37#include "block/blockjob.h"
  38#include "block/dirty-bitmap.h"
  39#include "block/qdict.h"
  40#include "block/throttle-groups.h"
  41#include "monitor/monitor.h"
  42#include "qemu/error-report.h"
  43#include "qemu/option.h"
  44#include "qemu/qemu-print.h"
  45#include "qemu/config-file.h"
  46#include "qapi/qapi-commands-block.h"
  47#include "qapi/qapi-commands-transaction.h"
  48#include "qapi/qapi-visit-block-core.h"
  49#include "qapi/qmp/qdict.h"
  50#include "qapi/qmp/qnum.h"
  51#include "qapi/qmp/qstring.h"
  52#include "qapi/error.h"
  53#include "qapi/qmp/qerror.h"
  54#include "qapi/qmp/qlist.h"
  55#include "qapi/qobject-output-visitor.h"
  56#include "sysemu/sysemu.h"
  57#include "sysemu/iothread.h"
  58#include "block/block_int.h"
  59#include "block/trace.h"
  60#include "sysemu/runstate.h"
  61#include "sysemu/replay.h"
  62#include "qemu/cutils.h"
  63#include "qemu/help_option.h"
  64#include "qemu/main-loop.h"
  65#include "qemu/throttle-options.h"
  66
  67/* Protected by BQL */
  68QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
  69    QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
  70
  71void bdrv_set_monitor_owned(BlockDriverState *bs)
  72{
  73    GLOBAL_STATE_CODE();
  74    QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
  75}
  76
  77static const char *const if_name[IF_COUNT] = {
  78    [IF_NONE] = "none",
  79    [IF_IDE] = "ide",
  80    [IF_SCSI] = "scsi",
  81    [IF_FLOPPY] = "floppy",
  82    [IF_PFLASH] = "pflash",
  83    [IF_MTD] = "mtd",
  84    [IF_SD] = "sd",
  85    [IF_VIRTIO] = "virtio",
  86    [IF_XEN] = "xen",
  87};
  88
  89static int if_max_devs[IF_COUNT] = {
  90    /*
  91     * Do not change these numbers!  They govern how drive option
  92     * index maps to unit and bus.  That mapping is ABI.
  93     *
  94     * All controllers used to implement if=T drives need to support
  95     * if_max_devs[T] units, for any T with if_max_devs[T] != 0.
  96     * Otherwise, some index values map to "impossible" bus, unit
  97     * values.
  98     *
  99     * For instance, if you change [IF_SCSI] to 255, -drive
 100     * if=scsi,index=12 no longer means bus=1,unit=5, but
 101     * bus=0,unit=12.  With an lsi53c895a controller (7 units max),
 102     * the drive can't be set up.  Regression.
 103     */
 104    [IF_IDE] = 2,
 105    [IF_SCSI] = 7,
 106};
 107
 108/**
 109 * Boards may call this to offer board-by-board overrides
 110 * of the default, global values.
 111 */
 112void override_max_devs(BlockInterfaceType type, int max_devs)
 113{
 114    BlockBackend *blk;
 115    DriveInfo *dinfo;
 116
 117    GLOBAL_STATE_CODE();
 118
 119    if (max_devs <= 0) {
 120        return;
 121    }
 122
 123    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 124        dinfo = blk_legacy_dinfo(blk);
 125        if (dinfo->type == type) {
 126            fprintf(stderr, "Cannot override units-per-bus property of"
 127                    " the %s interface, because a drive of that type has"
 128                    " already been added.\n", if_name[type]);
 129            g_assert_not_reached();
 130        }
 131    }
 132
 133    if_max_devs[type] = max_devs;
 134}
 135
 136/*
 137 * We automatically delete the drive when a device using it gets
 138 * unplugged.  Questionable feature, but we can't just drop it.
 139 * Device models call blockdev_mark_auto_del() to schedule the
 140 * automatic deletion, and generic qdev code calls blockdev_auto_del()
 141 * when deletion is actually safe.
 142 */
 143void blockdev_mark_auto_del(BlockBackend *blk)
 144{
 145    DriveInfo *dinfo = blk_legacy_dinfo(blk);
 146    BlockJob *job;
 147
 148    GLOBAL_STATE_CODE();
 149
 150    if (!dinfo) {
 151        return;
 152    }
 153
 154    JOB_LOCK_GUARD();
 155
 156    do {
 157        job = block_job_next_locked(NULL);
 158        while (job && (job->job.cancelled ||
 159                       job->job.deferred_to_main_loop ||
 160                       !block_job_has_bdrv(job, blk_bs(blk))))
 161        {
 162            job = block_job_next_locked(job);
 163        }
 164        if (job) {
 165            /*
 166             * This drops the job lock temporarily and polls, so we need to
 167             * restart processing the list from the start after this.
 168             */
 169            job_cancel_locked(&job->job, false);
 170        }
 171    } while (job);
 172
 173    dinfo->auto_del = 1;
 174}
 175
 176void blockdev_auto_del(BlockBackend *blk)
 177{
 178    DriveInfo *dinfo = blk_legacy_dinfo(blk);
 179    GLOBAL_STATE_CODE();
 180
 181    if (dinfo && dinfo->auto_del) {
 182        monitor_remove_blk(blk);
 183        blk_unref(blk);
 184    }
 185}
 186
 187static int drive_index_to_bus_id(BlockInterfaceType type, int index)
 188{
 189    int max_devs = if_max_devs[type];
 190    return max_devs ? index / max_devs : 0;
 191}
 192
 193static int drive_index_to_unit_id(BlockInterfaceType type, int index)
 194{
 195    int max_devs = if_max_devs[type];
 196    return max_devs ? index % max_devs : index;
 197}
 198
 199QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
 200                    const char *optstr)
 201{
 202    QemuOpts *opts;
 203
 204    GLOBAL_STATE_CODE();
 205
 206    opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
 207    if (!opts) {
 208        return NULL;
 209    }
 210    if (type != IF_DEFAULT) {
 211        qemu_opt_set(opts, "if", if_name[type], &error_abort);
 212    }
 213    if (index >= 0) {
 214        qemu_opt_set_number(opts, "index", index, &error_abort);
 215    }
 216    if (file)
 217        qemu_opt_set(opts, "file", file, &error_abort);
 218    return opts;
 219}
 220
 221DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
 222{
 223    BlockBackend *blk;
 224    DriveInfo *dinfo;
 225
 226    GLOBAL_STATE_CODE();
 227
 228    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 229        dinfo = blk_legacy_dinfo(blk);
 230        if (dinfo && dinfo->type == type
 231            && dinfo->bus == bus && dinfo->unit == unit) {
 232            return dinfo;
 233        }
 234    }
 235
 236    return NULL;
 237}
 238
 239/*
 240 * Check board claimed all -drive that are meant to be claimed.
 241 * Fatal error if any remain unclaimed.
 242 */
 243void drive_check_orphaned(void)
 244{
 245    BlockBackend *blk;
 246    DriveInfo *dinfo;
 247    Location loc;
 248    bool orphans = false;
 249
 250    GLOBAL_STATE_CODE();
 251
 252    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 253        dinfo = blk_legacy_dinfo(blk);
 254        /*
 255         * Ignore default drives, because we create certain default
 256         * drives unconditionally, then leave them unclaimed.  Not the
 257         * users fault.
 258         * Ignore IF_VIRTIO, because it gets desugared into -device,
 259         * so we can leave failing to -device.
 260         * Ignore IF_NONE, because leaving unclaimed IF_NONE remains
 261         * available for device_add is a feature.
 262         */
 263        if (dinfo->is_default || dinfo->type == IF_VIRTIO
 264            || dinfo->type == IF_NONE) {
 265            continue;
 266        }
 267        if (!blk_get_attached_dev(blk)) {
 268            loc_push_none(&loc);
 269            qemu_opts_loc_restore(dinfo->opts);
 270            error_report("machine type does not support"
 271                         " if=%s,bus=%d,unit=%d",
 272                         if_name[dinfo->type], dinfo->bus, dinfo->unit);
 273            loc_pop(&loc);
 274            orphans = true;
 275        }
 276    }
 277
 278    if (orphans) {
 279        exit(1);
 280    }
 281}
 282
 283DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
 284{
 285    GLOBAL_STATE_CODE();
 286    return drive_get(type,
 287                     drive_index_to_bus_id(type, index),
 288                     drive_index_to_unit_id(type, index));
 289}
 290
 291int drive_get_max_bus(BlockInterfaceType type)
 292{
 293    int max_bus;
 294    BlockBackend *blk;
 295    DriveInfo *dinfo;
 296
 297    GLOBAL_STATE_CODE();
 298
 299    max_bus = -1;
 300    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
 301        dinfo = blk_legacy_dinfo(blk);
 302        if (dinfo && dinfo->type == type && dinfo->bus > max_bus) {
 303            max_bus = dinfo->bus;
 304        }
 305    }
 306    return max_bus;
 307}
 308
 309static void bdrv_format_print(void *opaque, const char *name)
 310{
 311    qemu_printf(" %s", name);
 312}
 313
 314typedef struct {
 315    QEMUBH *bh;
 316    BlockDriverState *bs;
 317} BDRVPutRefBH;
 318
 319static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
 320{
 321    if (!strcmp(buf, "ignore")) {
 322        return BLOCKDEV_ON_ERROR_IGNORE;
 323    } else if (!is_read && !strcmp(buf, "enospc")) {
 324        return BLOCKDEV_ON_ERROR_ENOSPC;
 325    } else if (!strcmp(buf, "stop")) {
 326        return BLOCKDEV_ON_ERROR_STOP;
 327    } else if (!strcmp(buf, "report")) {
 328        return BLOCKDEV_ON_ERROR_REPORT;
 329    } else {
 330        error_setg(errp, "'%s' invalid %s error action",
 331                   buf, is_read ? "read" : "write");
 332        return -1;
 333    }
 334}
 335
 336static bool parse_stats_intervals(BlockAcctStats *stats, QList *intervals,
 337                                  Error **errp)
 338{
 339    const QListEntry *entry;
 340    for (entry = qlist_first(intervals); entry; entry = qlist_next(entry)) {
 341        switch (qobject_type(entry->value)) {
 342
 343        case QTYPE_QSTRING: {
 344            unsigned long long length;
 345            const char *str = qstring_get_str(qobject_to(QString,
 346                                                         entry->value));
 347            if (parse_uint_full(str, &length, 10) == 0 &&
 348                length > 0 && length <= UINT_MAX) {
 349                block_acct_add_interval(stats, (unsigned) length);
 350            } else {
 351                error_setg(errp, "Invalid interval length: %s", str);
 352                return false;
 353            }
 354            break;
 355        }
 356
 357        case QTYPE_QNUM: {
 358            int64_t length = qnum_get_int(qobject_to(QNum, entry->value));
 359
 360            if (length > 0 && length <= UINT_MAX) {
 361                block_acct_add_interval(stats, (unsigned) length);
 362            } else {
 363                error_setg(errp, "Invalid interval length: %" PRId64, length);
 364                return false;
 365            }
 366            break;
 367        }
 368
 369        default:
 370            error_setg(errp, "The specification of stats-intervals is invalid");
 371            return false;
 372        }
 373    }
 374    return true;
 375}
 376
 377typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;
 378
 379/* All parameters but @opts are optional and may be set to NULL. */
 380static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
 381    const char **throttling_group, ThrottleConfig *throttle_cfg,
 382    BlockdevDetectZeroesOptions *detect_zeroes, Error **errp)
 383{
 384    Error *local_error = NULL;
 385    const char *aio;
 386
 387    if (bdrv_flags) {
 388        if (qemu_opt_get_bool(opts, "copy-on-read", false)) {
 389            *bdrv_flags |= BDRV_O_COPY_ON_READ;
 390        }
 391
 392        if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
 393            if (bdrv_parse_aio(aio, bdrv_flags) < 0) {
 394                error_setg(errp, "invalid aio option");
 395                return;
 396            }
 397        }
 398    }
 399
 400    /* disk I/O throttling */
 401    if (throttling_group) {
 402        *throttling_group = qemu_opt_get(opts, "throttling.group");
 403    }
 404
 405    if (throttle_cfg) {
 406        throttle_config_init(throttle_cfg);
 407        throttle_cfg->buckets[THROTTLE_BPS_TOTAL].avg =
 408            qemu_opt_get_number(opts, "throttling.bps-total", 0);
 409        throttle_cfg->buckets[THROTTLE_BPS_READ].avg  =
 410            qemu_opt_get_number(opts, "throttling.bps-read", 0);
 411        throttle_cfg->buckets[THROTTLE_BPS_WRITE].avg =
 412            qemu_opt_get_number(opts, "throttling.bps-write", 0);
 413        throttle_cfg->buckets[THROTTLE_OPS_TOTAL].avg =
 414            qemu_opt_get_number(opts, "throttling.iops-total", 0);
 415        throttle_cfg->buckets[THROTTLE_OPS_READ].avg =
 416            qemu_opt_get_number(opts, "throttling.iops-read", 0);
 417        throttle_cfg->buckets[THROTTLE_OPS_WRITE].avg =
 418            qemu_opt_get_number(opts, "throttling.iops-write", 0);
 419
 420        throttle_cfg->buckets[THROTTLE_BPS_TOTAL].max =
 421            qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
 422        throttle_cfg->buckets[THROTTLE_BPS_READ].max  =
 423            qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
 424        throttle_cfg->buckets[THROTTLE_BPS_WRITE].max =
 425            qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
 426        throttle_cfg->buckets[THROTTLE_OPS_TOTAL].max =
 427            qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
 428        throttle_cfg->buckets[THROTTLE_OPS_READ].max =
 429            qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
 430        throttle_cfg->buckets[THROTTLE_OPS_WRITE].max =
 431            qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
 432
 433        throttle_cfg->buckets[THROTTLE_BPS_TOTAL].burst_length =
 434            qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
 435        throttle_cfg->buckets[THROTTLE_BPS_READ].burst_length  =
 436            qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
 437        throttle_cfg->buckets[THROTTLE_BPS_WRITE].burst_length =
 438            qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
 439        throttle_cfg->buckets[THROTTLE_OPS_TOTAL].burst_length =
 440            qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
 441        throttle_cfg->buckets[THROTTLE_OPS_READ].burst_length =
 442            qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
 443        throttle_cfg->buckets[THROTTLE_OPS_WRITE].burst_length =
 444            qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
 445
 446        throttle_cfg->op_size =
 447            qemu_opt_get_number(opts, "throttling.iops-size", 0);
 448
 449        if (!throttle_is_valid(throttle_cfg, errp)) {
 450            return;
 451        }
 452    }
 453
 454    if (detect_zeroes) {
 455        *detect_zeroes =
 456            qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup,
 457                            qemu_opt_get(opts, "detect-zeroes"),
 458                            BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
 459                            &local_error);
 460        if (local_error) {
 461            error_propagate(errp, local_error);
 462            return;
 463        }
 464    }
 465}
 466
 467static OnOffAuto account_get_opt(QemuOpts *opts, const char *name)
 468{
 469    if (!qemu_opt_find(opts, name)) {
 470        return ON_OFF_AUTO_AUTO;
 471    }
 472    if (qemu_opt_get_bool(opts, name, true)) {
 473        return ON_OFF_AUTO_ON;
 474    }
 475    return ON_OFF_AUTO_OFF;
 476}
 477
 478/* Takes the ownership of bs_opts */
 479static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
 480                                   Error **errp)
 481{
 482    const char *buf;
 483    int bdrv_flags = 0;
 484    int on_read_error, on_write_error;
 485    OnOffAuto account_invalid, account_failed;
 486    bool writethrough, read_only;
 487    BlockBackend *blk;
 488    BlockDriverState *bs;
 489    ThrottleConfig cfg;
 490    int snapshot = 0;
 491    Error *error = NULL;
 492    QemuOpts *opts;
 493    QDict *interval_dict = NULL;
 494    QList *interval_list = NULL;
 495    const char *id;
 496    BlockdevDetectZeroesOptions detect_zeroes =
 497        BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
 498    const char *throttling_group = NULL;
 499
 500    /* Check common options by copying from bs_opts to opts, all other options
 501     * stay in bs_opts for processing by bdrv_open(). */
 502    id = qdict_get_try_str(bs_opts, "id");
 503    opts = qemu_opts_create(&qemu_common_drive_opts, id, 1, errp);
 504    if (!opts) {
 505        goto err_no_opts;
 506    }
 507
 508    if (!qemu_opts_absorb_qdict(opts, bs_opts, errp)) {
 509        goto early_err;
 510    }
 511
 512    if (id) {
 513        qdict_del(bs_opts, "id");
 514    }
 515
 516    /* extract parameters */
 517    snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
 518
 519    account_invalid = account_get_opt(opts, "stats-account-invalid");
 520    account_failed = account_get_opt(opts, "stats-account-failed");
 521
 522    writethrough = !qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true);
 523
 524    id = qemu_opts_id(opts);
 525
 526    qdict_extract_subqdict(bs_opts, &interval_dict, "stats-intervals.");
 527    qdict_array_split(interval_dict, &interval_list);
 528
 529    if (qdict_size(interval_dict) != 0) {
 530        error_setg(errp, "Invalid option stats-intervals.%s",
 531                   qdict_first(interval_dict)->key);
 532        goto early_err;
 533    }
 534
 535    extract_common_blockdev_options(opts, &bdrv_flags, &throttling_group, &cfg,
 536                                    &detect_zeroes, &error);
 537    if (error) {
 538        error_propagate(errp, error);
 539        goto early_err;
 540    }
 541
 542    if ((buf = qemu_opt_get(opts, "format")) != NULL) {
 543        if (is_help_option(buf)) {
 544            qemu_printf("Supported formats:");
 545            bdrv_iterate_format(bdrv_format_print, NULL, false);
 546            qemu_printf("\nSupported formats (read-only):");
 547            bdrv_iterate_format(bdrv_format_print, NULL, true);
 548            qemu_printf("\n");
 549            goto early_err;
 550        }
 551
 552        if (qdict_haskey(bs_opts, "driver")) {
 553            error_setg(errp, "Cannot specify both 'driver' and 'format'");
 554            goto early_err;
 555        }
 556        qdict_put_str(bs_opts, "driver", buf);
 557    }
 558
 559    on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
 560    if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
 561        on_write_error = parse_block_error_action(buf, 0, &error);
 562        if (error) {
 563            error_propagate(errp, error);
 564            goto early_err;
 565        }
 566    }
 567
 568    on_read_error = BLOCKDEV_ON_ERROR_REPORT;
 569    if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
 570        on_read_error = parse_block_error_action(buf, 1, &error);
 571        if (error) {
 572            error_propagate(errp, error);
 573            goto early_err;
 574        }
 575    }
 576
 577    if (snapshot) {
 578        bdrv_flags |= BDRV_O_SNAPSHOT;
 579    }
 580
 581    read_only = qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false);
 582
 583    /* init */
 584    if ((!file || !*file) && !qdict_size(bs_opts)) {
 585        BlockBackendRootState *blk_rs;
 586
 587        blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
 588        blk_rs = blk_get_root_state(blk);
 589        blk_rs->open_flags    = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR);
 590        blk_rs->detect_zeroes = detect_zeroes;
 591
 592        qobject_unref(bs_opts);
 593    } else {
 594        if (file && !*file) {
 595            file = NULL;
 596        }
 597
 598        /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
 599         * with other callers) rather than what we want as the real defaults.
 600         * Apply the defaults here instead. */
 601        qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
 602        qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
 603        qdict_set_default_str(bs_opts, BDRV_OPT_READ_ONLY,
 604                              read_only ? "on" : "off");
 605        qdict_set_default_str(bs_opts, BDRV_OPT_AUTO_READ_ONLY, "on");
 606        assert((bdrv_flags & BDRV_O_CACHE_MASK) == 0);
 607
 608        if (runstate_check(RUN_STATE_INMIGRATE)) {
 609            bdrv_flags |= BDRV_O_INACTIVE;
 610        }
 611
 612        blk = blk_new_open(file, NULL, bs_opts, bdrv_flags, errp);
 613        if (!blk) {
 614            goto err_no_bs_opts;
 615        }
 616        bs = blk_bs(blk);
 617
 618        bs->detect_zeroes = detect_zeroes;
 619
 620        block_acct_setup(blk_get_stats(blk), account_invalid, account_failed);
 621
 622        if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) {
 623            blk_unref(blk);
 624            blk = NULL;
 625            goto err_no_bs_opts;
 626        }
 627    }
 628
 629    /* disk I/O throttling */
 630    if (throttle_enabled(&cfg)) {
 631        if (!throttling_group) {
 632            throttling_group = id;
 633        }
 634        blk_io_limits_enable(blk, throttling_group);
 635        blk_set_io_limits(blk, &cfg);
 636    }
 637
 638    blk_set_enable_write_cache(blk, !writethrough);
 639    blk_set_on_error(blk, on_read_error, on_write_error);
 640
 641    if (!monitor_add_blk(blk, id, errp)) {
 642        blk_unref(blk);
 643        blk = NULL;
 644        goto err_no_bs_opts;
 645    }
 646
 647err_no_bs_opts:
 648    qemu_opts_del(opts);
 649    qobject_unref(interval_dict);
 650    qobject_unref(interval_list);
 651    return blk;
 652
 653early_err:
 654    qemu_opts_del(opts);
 655    qobject_unref(interval_dict);
 656    qobject_unref(interval_list);
 657err_no_opts:
 658    qobject_unref(bs_opts);
 659    return NULL;
 660}
 661
 662/* Takes the ownership of bs_opts */
 663BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
 664{
 665    int bdrv_flags = 0;
 666
 667    GLOBAL_STATE_CODE();
 668    /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
 669     * with other callers) rather than what we want as the real defaults.
 670     * Apply the defaults here instead. */
 671    qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
 672    qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
 673    qdict_set_default_str(bs_opts, BDRV_OPT_READ_ONLY, "off");
 674
 675    if (runstate_check(RUN_STATE_INMIGRATE)) {
 676        bdrv_flags |= BDRV_O_INACTIVE;
 677    }
 678
 679    return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
 680}
 681
 682void blockdev_close_all_bdrv_states(void)
 683{
 684    BlockDriverState *bs, *next_bs;
 685
 686    GLOBAL_STATE_CODE();
 687    QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) {
 688        AioContext *ctx = bdrv_get_aio_context(bs);
 689
 690        aio_context_acquire(ctx);
 691        bdrv_unref(bs);
 692        aio_context_release(ctx);
 693    }
 694}
 695
 696/* Iterates over the list of monitor-owned BlockDriverStates */
 697BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs)
 698{
 699    GLOBAL_STATE_CODE();
 700    return bs ? QTAILQ_NEXT(bs, monitor_list)
 701              : QTAILQ_FIRST(&monitor_bdrv_states);
 702}
 703
 704static bool qemu_opt_rename(QemuOpts *opts, const char *from, const char *to,
 705                            Error **errp)
 706{
 707    const char *value;
 708
 709    value = qemu_opt_get(opts, from);
 710    if (value) {
 711        if (qemu_opt_find(opts, to)) {
 712            error_setg(errp, "'%s' and its alias '%s' can't be used at the "
 713                       "same time", to, from);
 714            return false;
 715        }
 716    }
 717
 718    /* rename all items in opts */
 719    while ((value = qemu_opt_get(opts, from))) {
 720        qemu_opt_set(opts, to, value, &error_abort);
 721        qemu_opt_unset(opts, from);
 722    }
 723    return true;
 724}
 725
 726QemuOptsList qemu_legacy_drive_opts = {
 727    .name = "drive",
 728    .head = QTAILQ_HEAD_INITIALIZER(qemu_legacy_drive_opts.head),
 729    .desc = {
 730        {
 731            .name = "bus",
 732            .type = QEMU_OPT_NUMBER,
 733            .help = "bus number",
 734        },{
 735            .name = "unit",
 736            .type = QEMU_OPT_NUMBER,
 737            .help = "unit number (i.e. lun for scsi)",
 738        },{
 739            .name = "index",
 740            .type = QEMU_OPT_NUMBER,
 741            .help = "index number",
 742        },{
 743            .name = "media",
 744            .type = QEMU_OPT_STRING,
 745            .help = "media type (disk, cdrom)",
 746        },{
 747            .name = "if",
 748            .type = QEMU_OPT_STRING,
 749            .help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio)",
 750        },{
 751            .name = "file",
 752            .type = QEMU_OPT_STRING,
 753            .help = "file name",
 754        },
 755
 756        /* Options that are passed on, but have special semantics with -drive */
 757        {
 758            .name = BDRV_OPT_READ_ONLY,
 759            .type = QEMU_OPT_BOOL,
 760            .help = "open drive file as read-only",
 761        },{
 762            .name = "rerror",
 763            .type = QEMU_OPT_STRING,
 764            .help = "read error action",
 765        },{
 766            .name = "werror",
 767            .type = QEMU_OPT_STRING,
 768            .help = "write error action",
 769        },{
 770            .name = "copy-on-read",
 771            .type = QEMU_OPT_BOOL,
 772            .help = "copy read data from backing file into image file",
 773        },
 774
 775        { /* end of list */ }
 776    },
 777};
 778
 779DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
 780                     Error **errp)
 781{
 782    const char *value;
 783    BlockBackend *blk;
 784    DriveInfo *dinfo = NULL;
 785    QDict *bs_opts;
 786    QemuOpts *legacy_opts;
 787    DriveMediaType media = MEDIA_DISK;
 788    BlockInterfaceType type;
 789    int max_devs, bus_id, unit_id, index;
 790    const char *werror, *rerror;
 791    bool read_only = false;
 792    bool copy_on_read;
 793    const char *filename;
 794    int i;
 795
 796    GLOBAL_STATE_CODE();
 797
 798    /* Change legacy command line options into QMP ones */
 799    static const struct {
 800        const char *from;
 801        const char *to;
 802    } opt_renames[] = {
 803        { "iops",           "throttling.iops-total" },
 804        { "iops_rd",        "throttling.iops-read" },
 805        { "iops_wr",        "throttling.iops-write" },
 806
 807        { "bps",            "throttling.bps-total" },
 808        { "bps_rd",         "throttling.bps-read" },
 809        { "bps_wr",         "throttling.bps-write" },
 810
 811        { "iops_max",       "throttling.iops-total-max" },
 812        { "iops_rd_max",    "throttling.iops-read-max" },
 813        { "iops_wr_max",    "throttling.iops-write-max" },
 814
 815        { "bps_max",        "throttling.bps-total-max" },
 816        { "bps_rd_max",     "throttling.bps-read-max" },
 817        { "bps_wr_max",     "throttling.bps-write-max" },
 818
 819        { "iops_size",      "throttling.iops-size" },
 820
 821        { "group",          "throttling.group" },
 822
 823        { "readonly",       BDRV_OPT_READ_ONLY },
 824    };
 825
 826    for (i = 0; i < ARRAY_SIZE(opt_renames); i++) {
 827        if (!qemu_opt_rename(all_opts, opt_renames[i].from,
 828                             opt_renames[i].to, errp)) {
 829            return NULL;
 830        }
 831    }
 832
 833    value = qemu_opt_get(all_opts, "cache");
 834    if (value) {
 835        int flags = 0;
 836        bool writethrough;
 837
 838        if (bdrv_parse_cache_mode(value, &flags, &writethrough) != 0) {
 839            error_setg(errp, "invalid cache option");
 840            return NULL;
 841        }
 842
 843        /* Specific options take precedence */
 844        if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_WB)) {
 845            qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_WB,
 846                              !writethrough, &error_abort);
 847        }
 848        if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) {
 849            qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT,
 850                              !!(flags & BDRV_O_NOCACHE), &error_abort);
 851        }
 852        if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_NO_FLUSH)) {
 853            qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_NO_FLUSH,
 854                              !!(flags & BDRV_O_NO_FLUSH), &error_abort);
 855        }
 856        qemu_opt_unset(all_opts, "cache");
 857    }
 858
 859    /* Get a QDict for processing the options */
 860    bs_opts = qdict_new();
 861    qemu_opts_to_qdict(all_opts, bs_opts);
 862
 863    legacy_opts = qemu_opts_create(&qemu_legacy_drive_opts, NULL, 0,
 864                                   &error_abort);
 865    if (!qemu_opts_absorb_qdict(legacy_opts, bs_opts, errp)) {
 866        goto fail;
 867    }
 868
 869    /* Media type */
 870    value = qemu_opt_get(legacy_opts, "media");
 871    if (value) {
 872        if (!strcmp(value, "disk")) {
 873            media = MEDIA_DISK;
 874        } else if (!strcmp(value, "cdrom")) {
 875            media = MEDIA_CDROM;
 876            read_only = true;
 877        } else {
 878            error_setg(errp, "'%s' invalid media", value);
 879            goto fail;
 880        }
 881    }
 882
 883    /* copy-on-read is disabled with a warning for read-only devices */
 884    read_only |= qemu_opt_get_bool(legacy_opts, BDRV_OPT_READ_ONLY, false);
 885    copy_on_read = qemu_opt_get_bool(legacy_opts, "copy-on-read", false);
 886
 887    if (read_only && copy_on_read) {
 888        warn_report("disabling copy-on-read on read-only drive");
 889        copy_on_read = false;
 890    }
 891
 892    qdict_put_str(bs_opts, BDRV_OPT_READ_ONLY, read_only ? "on" : "off");
 893    qdict_put_str(bs_opts, "copy-on-read", copy_on_read ? "on" : "off");
 894
 895    /* Controller type */
 896    value = qemu_opt_get(legacy_opts, "if");
 897    if (value) {
 898        for (type = 0;
 899             type < IF_COUNT && strcmp(value, if_name[type]);
 900             type++) {
 901        }
 902        if (type == IF_COUNT) {
 903            error_setg(errp, "unsupported bus type '%s'", value);
 904            goto fail;
 905        }
 906    } else {
 907        type = block_default_type;
 908    }
 909
 910    /* Device address specified by bus/unit or index.
 911     * If none was specified, try to find the first free one. */
 912    bus_id  = qemu_opt_get_number(legacy_opts, "bus", 0);
 913    unit_id = qemu_opt_get_number(legacy_opts, "unit", -1);
 914    index   = qemu_opt_get_number(legacy_opts, "index", -1);
 915
 916    max_devs = if_max_devs[type];
 917
 918    if (index != -1) {
 919        if (bus_id != 0 || unit_id != -1) {
 920            error_setg(errp, "index cannot be used with bus and unit");
 921            goto fail;
 922        }
 923        bus_id = drive_index_to_bus_id(type, index);
 924        unit_id = drive_index_to_unit_id(type, index);
 925    }
 926
 927    if (unit_id == -1) {
 928       unit_id = 0;
 929       while (drive_get(type, bus_id, unit_id) != NULL) {
 930           unit_id++;
 931           if (max_devs && unit_id >= max_devs) {
 932               unit_id -= max_devs;
 933               bus_id++;
 934           }
 935       }
 936    }
 937
 938    if (max_devs && unit_id >= max_devs) {
 939        error_setg(errp, "unit %d too big (max is %d)", unit_id, max_devs - 1);
 940        goto fail;
 941    }
 942
 943    if (drive_get(type, bus_id, unit_id) != NULL) {
 944        error_setg(errp, "drive with bus=%d, unit=%d (index=%d) exists",
 945                   bus_id, unit_id, index);
 946        goto fail;
 947    }
 948
 949    /* no id supplied -> create one */
 950    if (qemu_opts_id(all_opts) == NULL) {
 951        char *new_id;
 952        const char *mediastr = "";
 953        if (type == IF_IDE || type == IF_SCSI) {
 954            mediastr = (media == MEDIA_CDROM) ? "-cd" : "-hd";
 955        }
 956        if (max_devs) {
 957            new_id = g_strdup_printf("%s%i%s%i", if_name[type], bus_id,
 958                                     mediastr, unit_id);
 959        } else {
 960            new_id = g_strdup_printf("%s%s%i", if_name[type],
 961                                     mediastr, unit_id);
 962        }
 963        qdict_put_str(bs_opts, "id", new_id);
 964        g_free(new_id);
 965    }
 966
 967    /* Add virtio block device */
 968    if (type == IF_VIRTIO) {
 969        QemuOpts *devopts;
 970        devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
 971                                   &error_abort);
 972        qemu_opt_set(devopts, "driver", "virtio-blk", &error_abort);
 973        qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
 974                     &error_abort);
 975    }
 976
 977    filename = qemu_opt_get(legacy_opts, "file");
 978
 979    /* Check werror/rerror compatibility with if=... */
 980    werror = qemu_opt_get(legacy_opts, "werror");
 981    if (werror != NULL) {
 982        if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO &&
 983            type != IF_NONE) {
 984            error_setg(errp, "werror is not supported by this bus type");
 985            goto fail;
 986        }
 987        qdict_put_str(bs_opts, "werror", werror);
 988    }
 989
 990    rerror = qemu_opt_get(legacy_opts, "rerror");
 991    if (rerror != NULL) {
 992        if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI &&
 993            type != IF_NONE) {
 994            error_setg(errp, "rerror is not supported by this bus type");
 995            goto fail;
 996        }
 997        qdict_put_str(bs_opts, "rerror", rerror);
 998    }
 999
1000    /* Actual block device init: Functionality shared with blockdev-add */
1001    blk = blockdev_init(filename, bs_opts, errp);
1002    bs_opts = NULL;
1003    if (!blk) {
1004        goto fail;
1005    }
1006
1007    /* Create legacy DriveInfo */
1008    dinfo = g_malloc0(sizeof(*dinfo));
1009    dinfo->opts = all_opts;
1010
1011    dinfo->type = type;
1012    dinfo->bus = bus_id;
1013    dinfo->unit = unit_id;
1014
1015    blk_set_legacy_dinfo(blk, dinfo);
1016
1017    switch(type) {
1018    case IF_IDE:
1019    case IF_SCSI:
1020    case IF_XEN:
1021    case IF_NONE:
1022        dinfo->media_cd = media == MEDIA_CDROM;
1023        break;
1024    default:
1025        break;
1026    }
1027
1028fail:
1029    qemu_opts_del(legacy_opts);
1030    qobject_unref(bs_opts);
1031    return dinfo;
1032}
1033
1034static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp)
1035{
1036    BlockDriverState *bs;
1037    AioContext *aio_context;
1038
1039    bs = bdrv_lookup_bs(name, name, errp);
1040    if (bs == NULL) {
1041        return NULL;
1042    }
1043
1044    if (!bdrv_is_root_node(bs)) {
1045        error_setg(errp, "Need a root block node");
1046        return NULL;
1047    }
1048
1049    aio_context = bdrv_get_aio_context(bs);
1050    aio_context_acquire(aio_context);
1051
1052    if (!bdrv_is_inserted(bs)) {
1053        error_setg(errp, "Device has no medium");
1054        bs = NULL;
1055    }
1056
1057    aio_context_release(aio_context);
1058
1059    return bs;
1060}
1061
1062static void blockdev_do_action(TransactionAction *action, Error **errp)
1063{
1064    TransactionActionList list;
1065
1066    list.value = action;
1067    list.next = NULL;
1068    qmp_transaction(&list, NULL, errp);
1069}
1070
1071void qmp_blockdev_snapshot_sync(const char *device, const char *node_name,
1072                                const char *snapshot_file,
1073                                const char *snapshot_node_name,
1074                                const char *format,
1075                                bool has_mode, NewImageMode mode, Error **errp)
1076{
1077    BlockdevSnapshotSync snapshot = {
1078        .device = (char *) device,
1079        .node_name = (char *) node_name,
1080        .snapshot_file = (char *) snapshot_file,
1081        .snapshot_node_name = (char *) snapshot_node_name,
1082        .format = (char *) format,
1083        .has_mode = has_mode,
1084        .mode = mode,
1085    };
1086    TransactionAction action = {
1087        .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC,
1088        .u.blockdev_snapshot_sync.data = &snapshot,
1089    };
1090    blockdev_do_action(&action, errp);
1091}
1092
1093void qmp_blockdev_snapshot(const char *node, const char *overlay,
1094                           Error **errp)
1095{
1096    BlockdevSnapshot snapshot_data = {
1097        .node = (char *) node,
1098        .overlay = (char *) overlay
1099    };
1100    TransactionAction action = {
1101        .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT,
1102        .u.blockdev_snapshot.data = &snapshot_data,
1103    };
1104    blockdev_do_action(&action, errp);
1105}
1106
1107void qmp_blockdev_snapshot_internal_sync(const char *device,
1108                                         const char *name,
1109                                         Error **errp)
1110{
1111    BlockdevSnapshotInternal snapshot = {
1112        .device = (char *) device,
1113        .name = (char *) name
1114    };
1115    TransactionAction action = {
1116        .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC,
1117        .u.blockdev_snapshot_internal_sync.data = &snapshot,
1118    };
1119    blockdev_do_action(&action, errp);
1120}
1121
1122SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
1123                                                         const char *id,
1124                                                         const char *name,
1125                                                         Error **errp)
1126{
1127    BlockDriverState *bs;
1128    AioContext *aio_context;
1129    QEMUSnapshotInfo sn;
1130    Error *local_err = NULL;
1131    SnapshotInfo *info = NULL;
1132    int ret;
1133
1134    bs = qmp_get_root_bs(device, errp);
1135    if (!bs) {
1136        return NULL;
1137    }
1138    aio_context = bdrv_get_aio_context(bs);
1139    aio_context_acquire(aio_context);
1140
1141    if (!id && !name) {
1142        error_setg(errp, "Name or id must be provided");
1143        goto out_aio_context;
1144    }
1145
1146    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
1147        goto out_aio_context;
1148    }
1149
1150    ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err);
1151    if (local_err) {
1152        error_propagate(errp, local_err);
1153        goto out_aio_context;
1154    }
1155    if (!ret) {
1156        error_setg(errp,
1157                   "Snapshot with id '%s' and name '%s' does not exist on "
1158                   "device '%s'",
1159                   STR_OR_NULL(id), STR_OR_NULL(name), device);
1160        goto out_aio_context;
1161    }
1162
1163    bdrv_snapshot_delete(bs, id, name, &local_err);
1164    if (local_err) {
1165        error_propagate(errp, local_err);
1166        goto out_aio_context;
1167    }
1168
1169    aio_context_release(aio_context);
1170
1171    info = g_new0(SnapshotInfo, 1);
1172    info->id = g_strdup(sn.id_str);
1173    info->name = g_strdup(sn.name);
1174    info->date_nsec = sn.date_nsec;
1175    info->date_sec = sn.date_sec;
1176    info->vm_state_size = sn.vm_state_size;
1177    info->vm_clock_nsec = sn.vm_clock_nsec % 1000000000;
1178    info->vm_clock_sec = sn.vm_clock_nsec / 1000000000;
1179    if (sn.icount != -1ULL) {
1180        info->icount = sn.icount;
1181        info->has_icount = true;
1182    }
1183
1184    return info;
1185
1186out_aio_context:
1187    aio_context_release(aio_context);
1188    return NULL;
1189}
1190
1191/* New and old BlockDriverState structs for atomic group operations */
1192
1193typedef struct BlkActionState BlkActionState;
1194
1195/**
1196 * BlkActionOps:
1197 * Table of operations that define an Action.
1198 *
1199 * @instance_size: Size of state struct, in bytes.
1200 * @prepare: Prepare the work, must NOT be NULL.
1201 * @commit: Commit the changes, can be NULL.
1202 * @abort: Abort the changes on fail, can be NULL.
1203 * @clean: Clean up resources after all transaction actions have called
1204 *         commit() or abort(). Can be NULL.
1205 *
1206 * Only prepare() may fail. In a single transaction, only one of commit() or
1207 * abort() will be called. clean() will always be called if it is present.
1208 *
1209 * Always run under BQL.
1210 */
1211typedef struct BlkActionOps {
1212    size_t instance_size;
1213    void (*prepare)(BlkActionState *common, Error **errp);
1214    void (*commit)(BlkActionState *common);
1215    void (*abort)(BlkActionState *common);
1216    void (*clean)(BlkActionState *common);
1217} BlkActionOps;
1218
1219/**
1220 * BlkActionState:
1221 * Describes one Action's state within a Transaction.
1222 *
1223 * @action: QAPI-defined enum identifying which Action to perform.
1224 * @ops: Table of ActionOps this Action can perform.
1225 * @block_job_txn: Transaction which this action belongs to.
1226 * @entry: List membership for all Actions in this Transaction.
1227 *
1228 * This structure must be arranged as first member in a subclassed type,
1229 * assuming that the compiler will also arrange it to the same offsets as the
1230 * base class.
1231 */
1232struct BlkActionState {
1233    TransactionAction *action;
1234    const BlkActionOps *ops;
1235    JobTxn *block_job_txn;
1236    TransactionProperties *txn_props;
1237    QTAILQ_ENTRY(BlkActionState) entry;
1238};
1239
1240/* internal snapshot private data */
1241typedef struct InternalSnapshotState {
1242    BlkActionState common;
1243    BlockDriverState *bs;
1244    QEMUSnapshotInfo sn;
1245    bool created;
1246} InternalSnapshotState;
1247
1248
1249static int action_check_completion_mode(BlkActionState *s, Error **errp)
1250{
1251    if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
1252        error_setg(errp,
1253                   "Action '%s' does not support Transaction property "
1254                   "completion-mode = %s",
1255                   TransactionActionKind_str(s->action->type),
1256                   ActionCompletionMode_str(s->txn_props->completion_mode));
1257        return -1;
1258    }
1259    return 0;
1260}
1261
1262static void internal_snapshot_prepare(BlkActionState *common,
1263                                      Error **errp)
1264{
1265    Error *local_err = NULL;
1266    const char *device;
1267    const char *name;
1268    BlockDriverState *bs;
1269    QEMUSnapshotInfo old_sn, *sn;
1270    bool ret;
1271    int64_t rt;
1272    BlockdevSnapshotInternal *internal;
1273    InternalSnapshotState *state;
1274    AioContext *aio_context;
1275    int ret1;
1276
1277    g_assert(common->action->type ==
1278             TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC);
1279    internal = common->action->u.blockdev_snapshot_internal_sync.data;
1280    state = DO_UPCAST(InternalSnapshotState, common, common);
1281
1282    /* 1. parse input */
1283    device = internal->device;
1284    name = internal->name;
1285
1286    /* 2. check for validation */
1287    if (action_check_completion_mode(common, errp) < 0) {
1288        return;
1289    }
1290
1291    bs = qmp_get_root_bs(device, errp);
1292    if (!bs) {
1293        return;
1294    }
1295
1296    aio_context = bdrv_get_aio_context(bs);
1297    aio_context_acquire(aio_context);
1298
1299    state->bs = bs;
1300
1301    /* Paired with .clean() */
1302    bdrv_drained_begin(bs);
1303
1304    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
1305        goto out;
1306    }
1307
1308    if (bdrv_is_read_only(bs)) {
1309        error_setg(errp, "Device '%s' is read only", device);
1310        goto out;
1311    }
1312
1313    if (!bdrv_can_snapshot(bs)) {
1314        error_setg(errp, "Block format '%s' used by device '%s' "
1315                   "does not support internal snapshots",
1316                   bs->drv->format_name, device);
1317        goto out;
1318    }
1319
1320    if (!strlen(name)) {
1321        error_setg(errp, "Name is empty");
1322        goto out;
1323    }
1324
1325    /* check whether a snapshot with name exist */
1326    ret = bdrv_snapshot_find_by_id_and_name(bs, NULL, name, &old_sn,
1327                                            &local_err);
1328    if (local_err) {
1329        error_propagate(errp, local_err);
1330        goto out;
1331    } else if (ret) {
1332        error_setg(errp,
1333                   "Snapshot with name '%s' already exists on device '%s'",
1334                   name, device);
1335        goto out;
1336    }
1337
1338    /* 3. take the snapshot */
1339    sn = &state->sn;
1340    pstrcpy(sn->name, sizeof(sn->name), name);
1341    rt = g_get_real_time();
1342    sn->date_sec = rt / G_USEC_PER_SEC;
1343    sn->date_nsec = (rt % G_USEC_PER_SEC) * 1000;
1344    sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1345    if (replay_mode != REPLAY_MODE_NONE) {
1346        sn->icount = replay_get_current_icount();
1347    } else {
1348        sn->icount = -1ULL;
1349    }
1350
1351    ret1 = bdrv_snapshot_create(bs, sn);
1352    if (ret1 < 0) {
1353        error_setg_errno(errp, -ret1,
1354                         "Failed to create snapshot '%s' on device '%s'",
1355                         name, device);
1356        goto out;
1357    }
1358
1359    /* 4. succeed, mark a snapshot is created */
1360    state->created = true;
1361
1362out:
1363    aio_context_release(aio_context);
1364}
1365
1366static void internal_snapshot_abort(BlkActionState *common)
1367{
1368    InternalSnapshotState *state =
1369                             DO_UPCAST(InternalSnapshotState, common, common);
1370    BlockDriverState *bs = state->bs;
1371    QEMUSnapshotInfo *sn = &state->sn;
1372    AioContext *aio_context;
1373    Error *local_error = NULL;
1374
1375    if (!state->created) {
1376        return;
1377    }
1378
1379    aio_context = bdrv_get_aio_context(state->bs);
1380    aio_context_acquire(aio_context);
1381
1382    if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) {
1383        error_reportf_err(local_error,
1384                          "Failed to delete snapshot with id '%s' and "
1385                          "name '%s' on device '%s' in abort: ",
1386                          sn->id_str, sn->name,
1387                          bdrv_get_device_name(bs));
1388    }
1389
1390    aio_context_release(aio_context);
1391}
1392
1393static void internal_snapshot_clean(BlkActionState *common)
1394{
1395    InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState,
1396                                             common, common);
1397    AioContext *aio_context;
1398
1399    if (!state->bs) {
1400        return;
1401    }
1402
1403    aio_context = bdrv_get_aio_context(state->bs);
1404    aio_context_acquire(aio_context);
1405
1406    bdrv_drained_end(state->bs);
1407
1408    aio_context_release(aio_context);
1409}
1410
1411/* external snapshot private data */
1412typedef struct ExternalSnapshotState {
1413    BlkActionState common;
1414    BlockDriverState *old_bs;
1415    BlockDriverState *new_bs;
1416    bool overlay_appended;
1417} ExternalSnapshotState;
1418
1419static void external_snapshot_prepare(BlkActionState *common,
1420                                      Error **errp)
1421{
1422    int ret;
1423    int flags = 0;
1424    QDict *options = NULL;
1425    Error *local_err = NULL;
1426    /* Device and node name of the image to generate the snapshot from */
1427    const char *device;
1428    const char *node_name;
1429    /* Reference to the new image (for 'blockdev-snapshot') */
1430    const char *snapshot_ref;
1431    /* File name of the new image (for 'blockdev-snapshot-sync') */
1432    const char *new_image_file;
1433    ExternalSnapshotState *state =
1434                             DO_UPCAST(ExternalSnapshotState, common, common);
1435    TransactionAction *action = common->action;
1436    AioContext *aio_context;
1437    uint64_t perm, shared;
1438
1439    /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
1440     * purpose but a different set of parameters */
1441    switch (action->type) {
1442    case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT:
1443        {
1444            BlockdevSnapshot *s = action->u.blockdev_snapshot.data;
1445            device = s->node;
1446            node_name = s->node;
1447            new_image_file = NULL;
1448            snapshot_ref = s->overlay;
1449        }
1450        break;
1451    case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
1452        {
1453            BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
1454            device = s->device;
1455            node_name = s->node_name;
1456            new_image_file = s->snapshot_file;
1457            snapshot_ref = NULL;
1458        }
1459        break;
1460    default:
1461        g_assert_not_reached();
1462    }
1463
1464    /* start processing */
1465    if (action_check_completion_mode(common, errp) < 0) {
1466        return;
1467    }
1468
1469    state->old_bs = bdrv_lookup_bs(device, node_name, errp);
1470    if (!state->old_bs) {
1471        return;
1472    }
1473
1474    aio_context = bdrv_get_aio_context(state->old_bs);
1475    aio_context_acquire(aio_context);
1476
1477    /* Paired with .clean() */
1478    bdrv_drained_begin(state->old_bs);
1479
1480    if (!bdrv_is_inserted(state->old_bs)) {
1481        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
1482        goto out;
1483    }
1484
1485    if (bdrv_op_is_blocked(state->old_bs,
1486                           BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, errp)) {
1487        goto out;
1488    }
1489
1490    if (!bdrv_is_read_only(state->old_bs)) {
1491        if (bdrv_flush(state->old_bs)) {
1492            error_setg(errp, QERR_IO_ERROR);
1493            goto out;
1494        }
1495    }
1496
1497    if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) {
1498        BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
1499        const char *format = s->format ?: "qcow2";
1500        enum NewImageMode mode;
1501        const char *snapshot_node_name = s->snapshot_node_name;
1502
1503        if (node_name && !snapshot_node_name) {
1504            error_setg(errp, "New overlay node-name missing");
1505            goto out;
1506        }
1507
1508        if (snapshot_node_name &&
1509            bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) {
1510            error_setg(errp, "New overlay node-name already in use");
1511            goto out;
1512        }
1513
1514        flags = state->old_bs->open_flags;
1515        flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_COPY_ON_READ);
1516        flags |= BDRV_O_NO_BACKING;
1517
1518        /* create new image w/backing file */
1519        mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
1520        if (mode != NEW_IMAGE_MODE_EXISTING) {
1521            int64_t size = bdrv_getlength(state->old_bs);
1522            if (size < 0) {
1523                error_setg_errno(errp, -size, "bdrv_getlength failed");
1524                goto out;
1525            }
1526            bdrv_refresh_filename(state->old_bs);
1527
1528            aio_context_release(aio_context);
1529            bdrv_img_create(new_image_file, format,
1530                            state->old_bs->filename,
1531                            state->old_bs->drv->format_name,
1532                            NULL, size, flags, false, &local_err);
1533            aio_context_acquire(aio_context);
1534
1535            if (local_err) {
1536                error_propagate(errp, local_err);
1537                goto out;
1538            }
1539        }
1540
1541        options = qdict_new();
1542        if (snapshot_node_name) {
1543            qdict_put_str(options, "node-name", snapshot_node_name);
1544        }
1545        qdict_put_str(options, "driver", format);
1546    }
1547
1548    state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
1549                              errp);
1550    /* We will manually add the backing_hd field to the bs later */
1551    if (!state->new_bs) {
1552        goto out;
1553    }
1554
1555    /*
1556     * Allow attaching a backing file to an overlay that's already in use only
1557     * if the parents don't assume that they are already seeing a valid image.
1558     * (Specifically, allow it as a mirror target, which is write-only access.)
1559     */
1560    bdrv_get_cumulative_perm(state->new_bs, &perm, &shared);
1561    if (perm & BLK_PERM_CONSISTENT_READ) {
1562        error_setg(errp, "The overlay is already in use");
1563        goto out;
1564    }
1565
1566    if (state->new_bs->drv->is_filter) {
1567        error_setg(errp, "Filters cannot be used as overlays");
1568        goto out;
1569    }
1570
1571    if (bdrv_cow_child(state->new_bs)) {
1572        error_setg(errp, "The overlay already has a backing image");
1573        goto out;
1574    }
1575
1576    if (!state->new_bs->drv->supports_backing) {
1577        error_setg(errp, "The overlay does not support backing images");
1578        goto out;
1579    }
1580
1581    ret = bdrv_append(state->new_bs, state->old_bs, errp);
1582    if (ret < 0) {
1583        goto out;
1584    }
1585    state->overlay_appended = true;
1586
1587out:
1588    aio_context_release(aio_context);
1589}
1590
1591static void external_snapshot_commit(BlkActionState *common)
1592{
1593    ExternalSnapshotState *state =
1594                             DO_UPCAST(ExternalSnapshotState, common, common);
1595    AioContext *aio_context;
1596
1597    aio_context = bdrv_get_aio_context(state->old_bs);
1598    aio_context_acquire(aio_context);
1599
1600    /* We don't need (or want) to use the transactional
1601     * bdrv_reopen_multiple() across all the entries at once, because we
1602     * don't want to abort all of them if one of them fails the reopen */
1603    if (!qatomic_read(&state->old_bs->copy_on_read)) {
1604        bdrv_reopen_set_read_only(state->old_bs, true, NULL);
1605    }
1606
1607    aio_context_release(aio_context);
1608}
1609
1610static void external_snapshot_abort(BlkActionState *common)
1611{
1612    ExternalSnapshotState *state =
1613                             DO_UPCAST(ExternalSnapshotState, common, common);
1614    if (state->new_bs) {
1615        if (state->overlay_appended) {
1616            AioContext *aio_context;
1617            AioContext *tmp_context;
1618            int ret;
1619
1620            aio_context = bdrv_get_aio_context(state->old_bs);
1621            aio_context_acquire(aio_context);
1622
1623            bdrv_ref(state->old_bs);   /* we can't let bdrv_set_backind_hd()
1624                                          close state->old_bs; we need it */
1625            bdrv_set_backing_hd(state->new_bs, NULL, &error_abort);
1626
1627            /*
1628             * The call to bdrv_set_backing_hd() above returns state->old_bs to
1629             * the main AioContext. As we're still going to be using it, return
1630             * it to the AioContext it was before.
1631             */
1632            tmp_context = bdrv_get_aio_context(state->old_bs);
1633            if (aio_context != tmp_context) {
1634                aio_context_release(aio_context);
1635                aio_context_acquire(tmp_context);
1636
1637                ret = bdrv_try_change_aio_context(state->old_bs,
1638                                                  aio_context, NULL, NULL);
1639                assert(ret == 0);
1640
1641                aio_context_release(tmp_context);
1642                aio_context_acquire(aio_context);
1643            }
1644
1645            bdrv_replace_node(state->new_bs, state->old_bs, &error_abort);
1646            bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */
1647
1648            aio_context_release(aio_context);
1649        }
1650    }
1651}
1652
1653static void external_snapshot_clean(BlkActionState *common)
1654{
1655    ExternalSnapshotState *state =
1656                             DO_UPCAST(ExternalSnapshotState, common, common);
1657    AioContext *aio_context;
1658
1659    if (!state->old_bs) {
1660        return;
1661    }
1662
1663    aio_context = bdrv_get_aio_context(state->old_bs);
1664    aio_context_acquire(aio_context);
1665
1666    bdrv_drained_end(state->old_bs);
1667    bdrv_unref(state->new_bs);
1668
1669    aio_context_release(aio_context);
1670}
1671
1672typedef struct DriveBackupState {
1673    BlkActionState common;
1674    BlockDriverState *bs;
1675    BlockJob *job;
1676} DriveBackupState;
1677
1678static BlockJob *do_backup_common(BackupCommon *backup,
1679                                  BlockDriverState *bs,
1680                                  BlockDriverState *target_bs,
1681                                  AioContext *aio_context,
1682                                  JobTxn *txn, Error **errp);
1683
1684static void drive_backup_prepare(BlkActionState *common, Error **errp)
1685{
1686    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
1687    DriveBackup *backup;
1688    BlockDriverState *bs;
1689    BlockDriverState *target_bs;
1690    BlockDriverState *source = NULL;
1691    AioContext *aio_context;
1692    AioContext *old_context;
1693    const char *format;
1694    QDict *options;
1695    Error *local_err = NULL;
1696    int flags;
1697    int64_t size;
1698    bool set_backing_hd = false;
1699    int ret;
1700
1701    assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
1702    backup = common->action->u.drive_backup.data;
1703
1704    if (!backup->has_mode) {
1705        backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
1706    }
1707
1708    bs = bdrv_lookup_bs(backup->device, backup->device, errp);
1709    if (!bs) {
1710        return;
1711    }
1712
1713    if (!bs->drv) {
1714        error_setg(errp, "Device has no medium");
1715        return;
1716    }
1717
1718    aio_context = bdrv_get_aio_context(bs);
1719    aio_context_acquire(aio_context);
1720
1721    state->bs = bs;
1722    /* Paired with .clean() */
1723    bdrv_drained_begin(bs);
1724
1725    format = backup->format;
1726    if (!format && backup->mode != NEW_IMAGE_MODE_EXISTING) {
1727        format = bs->drv->format_name;
1728    }
1729
1730    /* Early check to avoid creating target */
1731    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
1732        goto out;
1733    }
1734
1735    flags = bs->open_flags | BDRV_O_RDWR;
1736
1737    /*
1738     * See if we have a backing HD we can use to create our new image
1739     * on top of.
1740     */
1741    if (backup->sync == MIRROR_SYNC_MODE_TOP) {
1742        /*
1743         * Backup will not replace the source by the target, so none
1744         * of the filters skipped here will be removed (in contrast to
1745         * mirror).  Therefore, we can skip all of them when looking
1746         * for the first COW relationship.
1747         */
1748        source = bdrv_cow_bs(bdrv_skip_filters(bs));
1749        if (!source) {
1750            backup->sync = MIRROR_SYNC_MODE_FULL;
1751        }
1752    }
1753    if (backup->sync == MIRROR_SYNC_MODE_NONE) {
1754        source = bs;
1755        flags |= BDRV_O_NO_BACKING;
1756        set_backing_hd = true;
1757    }
1758
1759    size = bdrv_getlength(bs);
1760    if (size < 0) {
1761        error_setg_errno(errp, -size, "bdrv_getlength failed");
1762        goto out;
1763    }
1764
1765    if (backup->mode != NEW_IMAGE_MODE_EXISTING) {
1766        assert(format);
1767        if (source) {
1768            /* Implicit filters should not appear in the filename */
1769            BlockDriverState *explicit_backing =
1770                bdrv_skip_implicit_filters(source);
1771
1772            bdrv_refresh_filename(explicit_backing);
1773            bdrv_img_create(backup->target, format,
1774                            explicit_backing->filename,
1775                            explicit_backing->drv->format_name, NULL,
1776                            size, flags, false, &local_err);
1777        } else {
1778            bdrv_img_create(backup->target, format, NULL, NULL, NULL,
1779                            size, flags, false, &local_err);
1780        }
1781    }
1782
1783    if (local_err) {
1784        error_propagate(errp, local_err);
1785        goto out;
1786    }
1787
1788    options = qdict_new();
1789    qdict_put_str(options, "discard", "unmap");
1790    qdict_put_str(options, "detect-zeroes", "unmap");
1791    if (format) {
1792        qdict_put_str(options, "driver", format);
1793    }
1794
1795    target_bs = bdrv_open(backup->target, NULL, options, flags, errp);
1796    if (!target_bs) {
1797        goto out;
1798    }
1799
1800    /* Honor bdrv_try_change_aio_context() context acquisition requirements. */
1801    old_context = bdrv_get_aio_context(target_bs);
1802    aio_context_release(aio_context);
1803    aio_context_acquire(old_context);
1804
1805    ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
1806    if (ret < 0) {
1807        bdrv_unref(target_bs);
1808        aio_context_release(old_context);
1809        return;
1810    }
1811
1812    aio_context_release(old_context);
1813    aio_context_acquire(aio_context);
1814
1815    if (set_backing_hd) {
1816        if (bdrv_set_backing_hd(target_bs, source, errp) < 0) {
1817            goto unref;
1818        }
1819    }
1820
1821    state->job = do_backup_common(qapi_DriveBackup_base(backup),
1822                                  bs, target_bs, aio_context,
1823                                  common->block_job_txn, errp);
1824
1825unref:
1826    bdrv_unref(target_bs);
1827out:
1828    aio_context_release(aio_context);
1829}
1830
1831static void drive_backup_commit(BlkActionState *common)
1832{
1833    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
1834    AioContext *aio_context;
1835
1836    aio_context = bdrv_get_aio_context(state->bs);
1837    aio_context_acquire(aio_context);
1838
1839    assert(state->job);
1840    job_start(&state->job->job);
1841
1842    aio_context_release(aio_context);
1843}
1844
1845static void drive_backup_abort(BlkActionState *common)
1846{
1847    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
1848
1849    if (state->job) {
1850        job_cancel_sync(&state->job->job, true);
1851    }
1852}
1853
1854static void drive_backup_clean(BlkActionState *common)
1855{
1856    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
1857    AioContext *aio_context;
1858
1859    if (!state->bs) {
1860        return;
1861    }
1862
1863    aio_context = bdrv_get_aio_context(state->bs);
1864    aio_context_acquire(aio_context);
1865
1866    bdrv_drained_end(state->bs);
1867
1868    aio_context_release(aio_context);
1869}
1870
1871typedef struct BlockdevBackupState {
1872    BlkActionState common;
1873    BlockDriverState *bs;
1874    BlockJob *job;
1875} BlockdevBackupState;
1876
1877static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
1878{
1879    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
1880    BlockdevBackup *backup;
1881    BlockDriverState *bs;
1882    BlockDriverState *target_bs;
1883    AioContext *aio_context;
1884    AioContext *old_context;
1885    int ret;
1886
1887    assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
1888    backup = common->action->u.blockdev_backup.data;
1889
1890    bs = bdrv_lookup_bs(backup->device, backup->device, errp);
1891    if (!bs) {
1892        return;
1893    }
1894
1895    target_bs = bdrv_lookup_bs(backup->target, backup->target, errp);
1896    if (!target_bs) {
1897        return;
1898    }
1899
1900    /* Honor bdrv_try_change_aio_context() context acquisition requirements. */
1901    aio_context = bdrv_get_aio_context(bs);
1902    old_context = bdrv_get_aio_context(target_bs);
1903    aio_context_acquire(old_context);
1904
1905    ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
1906    if (ret < 0) {
1907        aio_context_release(old_context);
1908        return;
1909    }
1910
1911    aio_context_release(old_context);
1912    aio_context_acquire(aio_context);
1913    state->bs = bs;
1914
1915    /* Paired with .clean() */
1916    bdrv_drained_begin(state->bs);
1917
1918    state->job = do_backup_common(qapi_BlockdevBackup_base(backup),
1919                                  bs, target_bs, aio_context,
1920                                  common->block_job_txn, errp);
1921
1922    aio_context_release(aio_context);
1923}
1924
1925static void blockdev_backup_commit(BlkActionState *common)
1926{
1927    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
1928    AioContext *aio_context;
1929
1930    aio_context = bdrv_get_aio_context(state->bs);
1931    aio_context_acquire(aio_context);
1932
1933    assert(state->job);
1934    job_start(&state->job->job);
1935
1936    aio_context_release(aio_context);
1937}
1938
1939static void blockdev_backup_abort(BlkActionState *common)
1940{
1941    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
1942
1943    if (state->job) {
1944        job_cancel_sync(&state->job->job, true);
1945    }
1946}
1947
1948static void blockdev_backup_clean(BlkActionState *common)
1949{
1950    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
1951    AioContext *aio_context;
1952
1953    if (!state->bs) {
1954        return;
1955    }
1956
1957    aio_context = bdrv_get_aio_context(state->bs);
1958    aio_context_acquire(aio_context);
1959
1960    bdrv_drained_end(state->bs);
1961
1962    aio_context_release(aio_context);
1963}
1964
1965typedef struct BlockDirtyBitmapState {
1966    BlkActionState common;
1967    BdrvDirtyBitmap *bitmap;
1968    BlockDriverState *bs;
1969    HBitmap *backup;
1970    bool prepared;
1971    bool was_enabled;
1972} BlockDirtyBitmapState;
1973
1974static void block_dirty_bitmap_add_prepare(BlkActionState *common,
1975                                           Error **errp)
1976{
1977    Error *local_err = NULL;
1978    BlockDirtyBitmapAdd *action;
1979    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
1980                                             common, common);
1981
1982    if (action_check_completion_mode(common, errp) < 0) {
1983        return;
1984    }
1985
1986    action = common->action->u.block_dirty_bitmap_add.data;
1987    /* AIO context taken and released within qmp_block_dirty_bitmap_add */
1988    qmp_block_dirty_bitmap_add(action->node, action->name,
1989                               action->has_granularity, action->granularity,
1990                               action->has_persistent, action->persistent,
1991                               action->has_disabled, action->disabled,
1992                               &local_err);
1993
1994    if (!local_err) {
1995        state->prepared = true;
1996    } else {
1997        error_propagate(errp, local_err);
1998    }
1999}
2000
2001static void block_dirty_bitmap_add_abort(BlkActionState *common)
2002{
2003    BlockDirtyBitmapAdd *action;
2004    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2005                                             common, common);
2006
2007    action = common->action->u.block_dirty_bitmap_add.data;
2008    /* Should not be able to fail: IF the bitmap was added via .prepare(),
2009     * then the node reference and bitmap name must have been valid.
2010     */
2011    if (state->prepared) {
2012        qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort);
2013    }
2014}
2015
2016static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
2017                                             Error **errp)
2018{
2019    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2020                                             common, common);
2021    BlockDirtyBitmap *action;
2022
2023    if (action_check_completion_mode(common, errp) < 0) {
2024        return;
2025    }
2026
2027    action = common->action->u.block_dirty_bitmap_clear.data;
2028    state->bitmap = block_dirty_bitmap_lookup(action->node,
2029                                              action->name,
2030                                              &state->bs,
2031                                              errp);
2032    if (!state->bitmap) {
2033        return;
2034    }
2035
2036    if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_DEFAULT, errp)) {
2037        return;
2038    }
2039
2040    bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
2041}
2042
2043static void block_dirty_bitmap_restore(BlkActionState *common)
2044{
2045    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2046                                             common, common);
2047
2048    if (state->backup) {
2049        bdrv_restore_dirty_bitmap(state->bitmap, state->backup);
2050    }
2051}
2052
2053static void block_dirty_bitmap_free_backup(BlkActionState *common)
2054{
2055    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2056                                             common, common);
2057
2058    hbitmap_free(state->backup);
2059}
2060
2061static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
2062                                              Error **errp)
2063{
2064    BlockDirtyBitmap *action;
2065    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2066                                             common, common);
2067
2068    if (action_check_completion_mode(common, errp) < 0) {
2069        return;
2070    }
2071
2072    action = common->action->u.block_dirty_bitmap_enable.data;
2073    state->bitmap = block_dirty_bitmap_lookup(action->node,
2074                                              action->name,
2075                                              NULL,
2076                                              errp);
2077    if (!state->bitmap) {
2078        return;
2079    }
2080
2081    if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
2082        return;
2083    }
2084
2085    state->was_enabled = bdrv_dirty_bitmap_enabled(state->bitmap);
2086    bdrv_enable_dirty_bitmap(state->bitmap);
2087}
2088
2089static void block_dirty_bitmap_enable_abort(BlkActionState *common)
2090{
2091    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2092                                             common, common);
2093
2094    if (!state->was_enabled) {
2095        bdrv_disable_dirty_bitmap(state->bitmap);
2096    }
2097}
2098
2099static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
2100                                               Error **errp)
2101{
2102    BlockDirtyBitmap *action;
2103    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2104                                             common, common);
2105
2106    if (action_check_completion_mode(common, errp) < 0) {
2107        return;
2108    }
2109
2110    action = common->action->u.block_dirty_bitmap_disable.data;
2111    state->bitmap = block_dirty_bitmap_lookup(action->node,
2112                                              action->name,
2113                                              NULL,
2114                                              errp);
2115    if (!state->bitmap) {
2116        return;
2117    }
2118
2119    if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
2120        return;
2121    }
2122
2123    state->was_enabled = bdrv_dirty_bitmap_enabled(state->bitmap);
2124    bdrv_disable_dirty_bitmap(state->bitmap);
2125}
2126
2127static void block_dirty_bitmap_disable_abort(BlkActionState *common)
2128{
2129    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2130                                             common, common);
2131
2132    if (state->was_enabled) {
2133        bdrv_enable_dirty_bitmap(state->bitmap);
2134    }
2135}
2136
2137static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
2138                                             Error **errp)
2139{
2140    BlockDirtyBitmapMerge *action;
2141    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2142                                             common, common);
2143
2144    if (action_check_completion_mode(common, errp) < 0) {
2145        return;
2146    }
2147
2148    action = common->action->u.block_dirty_bitmap_merge.data;
2149
2150    state->bitmap = block_dirty_bitmap_merge(action->node, action->target,
2151                                             action->bitmaps, &state->backup,
2152                                             errp);
2153}
2154
2155static void block_dirty_bitmap_remove_prepare(BlkActionState *common,
2156                                              Error **errp)
2157{
2158    BlockDirtyBitmap *action;
2159    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2160                                             common, common);
2161
2162    if (action_check_completion_mode(common, errp) < 0) {
2163        return;
2164    }
2165
2166    action = common->action->u.block_dirty_bitmap_remove.data;
2167
2168    state->bitmap = block_dirty_bitmap_remove(action->node, action->name,
2169                                              false, &state->bs, errp);
2170    if (state->bitmap) {
2171        bdrv_dirty_bitmap_skip_store(state->bitmap, true);
2172        bdrv_dirty_bitmap_set_busy(state->bitmap, true);
2173    }
2174}
2175
2176static void block_dirty_bitmap_remove_abort(BlkActionState *common)
2177{
2178    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2179                                             common, common);
2180
2181    if (state->bitmap) {
2182        bdrv_dirty_bitmap_skip_store(state->bitmap, false);
2183        bdrv_dirty_bitmap_set_busy(state->bitmap, false);
2184    }
2185}
2186
2187static void block_dirty_bitmap_remove_commit(BlkActionState *common)
2188{
2189    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
2190                                             common, common);
2191
2192    bdrv_dirty_bitmap_set_busy(state->bitmap, false);
2193    bdrv_release_dirty_bitmap(state->bitmap);
2194}
2195
2196static void abort_prepare(BlkActionState *common, Error **errp)
2197{
2198    error_setg(errp, "Transaction aborted using Abort action");
2199}
2200
2201static void abort_commit(BlkActionState *common)
2202{
2203    g_assert_not_reached(); /* this action never succeeds */
2204}
2205
2206static const BlkActionOps actions[] = {
2207    [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = {
2208        .instance_size = sizeof(ExternalSnapshotState),
2209        .prepare  = external_snapshot_prepare,
2210        .commit   = external_snapshot_commit,
2211        .abort = external_snapshot_abort,
2212        .clean = external_snapshot_clean,
2213    },
2214    [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = {
2215        .instance_size = sizeof(ExternalSnapshotState),
2216        .prepare  = external_snapshot_prepare,
2217        .commit   = external_snapshot_commit,
2218        .abort = external_snapshot_abort,
2219        .clean = external_snapshot_clean,
2220    },
2221    [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
2222        .instance_size = sizeof(DriveBackupState),
2223        .prepare = drive_backup_prepare,
2224        .commit = drive_backup_commit,
2225        .abort = drive_backup_abort,
2226        .clean = drive_backup_clean,
2227    },
2228    [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
2229        .instance_size = sizeof(BlockdevBackupState),
2230        .prepare = blockdev_backup_prepare,
2231        .commit = blockdev_backup_commit,
2232        .abort = blockdev_backup_abort,
2233        .clean = blockdev_backup_clean,
2234    },
2235    [TRANSACTION_ACTION_KIND_ABORT] = {
2236        .instance_size = sizeof(BlkActionState),
2237        .prepare = abort_prepare,
2238        .commit = abort_commit,
2239    },
2240    [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC] = {
2241        .instance_size = sizeof(InternalSnapshotState),
2242        .prepare  = internal_snapshot_prepare,
2243        .abort = internal_snapshot_abort,
2244        .clean = internal_snapshot_clean,
2245    },
2246    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = {
2247        .instance_size = sizeof(BlockDirtyBitmapState),
2248        .prepare = block_dirty_bitmap_add_prepare,
2249        .abort = block_dirty_bitmap_add_abort,
2250    },
2251    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = {
2252        .instance_size = sizeof(BlockDirtyBitmapState),
2253        .prepare = block_dirty_bitmap_clear_prepare,
2254        .commit = block_dirty_bitmap_free_backup,
2255        .abort = block_dirty_bitmap_restore,
2256    },
2257    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = {
2258        .instance_size = sizeof(BlockDirtyBitmapState),
2259        .prepare = block_dirty_bitmap_enable_prepare,
2260        .abort = block_dirty_bitmap_enable_abort,
2261    },
2262    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = {
2263        .instance_size = sizeof(BlockDirtyBitmapState),
2264        .prepare = block_dirty_bitmap_disable_prepare,
2265        .abort = block_dirty_bitmap_disable_abort,
2266    },
2267    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = {
2268        .instance_size = sizeof(BlockDirtyBitmapState),
2269        .prepare = block_dirty_bitmap_merge_prepare,
2270        .commit = block_dirty_bitmap_free_backup,
2271        .abort = block_dirty_bitmap_restore,
2272    },
2273    [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = {
2274        .instance_size = sizeof(BlockDirtyBitmapState),
2275        .prepare = block_dirty_bitmap_remove_prepare,
2276        .commit = block_dirty_bitmap_remove_commit,
2277        .abort = block_dirty_bitmap_remove_abort,
2278    },
2279    /* Where are transactions for MIRROR, COMMIT and STREAM?
2280     * Although these blockjobs use transaction callbacks like the backup job,
2281     * these jobs do not necessarily adhere to transaction semantics.
2282     * These jobs may not fully undo all of their actions on abort, nor do they
2283     * necessarily work in transactions with more than one job in them.
2284     */
2285};
2286
2287/**
2288 * Allocate a TransactionProperties structure if necessary, and fill
2289 * that structure with desired defaults if they are unset.
2290 */
2291static TransactionProperties *get_transaction_properties(
2292    TransactionProperties *props)
2293{
2294    if (!props) {
2295        props = g_new0(TransactionProperties, 1);
2296    }
2297
2298    if (!props->has_completion_mode) {
2299        props->has_completion_mode = true;
2300        props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL;
2301    }
2302
2303    return props;
2304}
2305
2306/*
2307 * 'Atomic' group operations.  The operations are performed as a set, and if
2308 * any fail then we roll back all operations in the group.
2309 *
2310 * Always run under BQL.
2311 */
2312void qmp_transaction(TransactionActionList *dev_list,
2313                     struct TransactionProperties *props,
2314                     Error **errp)
2315{
2316    TransactionActionList *dev_entry = dev_list;
2317    bool has_props = !!props;
2318    JobTxn *block_job_txn = NULL;
2319    BlkActionState *state, *next;
2320    Error *local_err = NULL;
2321
2322    GLOBAL_STATE_CODE();
2323
2324    QTAILQ_HEAD(, BlkActionState) snap_bdrv_states;
2325    QTAILQ_INIT(&snap_bdrv_states);
2326
2327    /* Does this transaction get canceled as a group on failure?
2328     * If not, we don't really need to make a JobTxn.
2329     */
2330    props = get_transaction_properties(props);
2331    if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
2332        block_job_txn = job_txn_new();
2333    }
2334
2335    /* drain all i/o before any operations */
2336    bdrv_drain_all();
2337
2338    /* We don't do anything in this loop that commits us to the operations */
2339    while (NULL != dev_entry) {
2340        TransactionAction *dev_info = NULL;
2341        const BlkActionOps *ops;
2342
2343        dev_info = dev_entry->value;
2344        dev_entry = dev_entry->next;
2345
2346        assert(dev_info->type < ARRAY_SIZE(actions));
2347
2348        ops = &actions[dev_info->type];
2349        assert(ops->instance_size > 0);
2350
2351        state = g_malloc0(ops->instance_size);
2352        state->ops = ops;
2353        state->action = dev_info;
2354        state->block_job_txn = block_job_txn;
2355        state->txn_props = props;
2356        QTAILQ_INSERT_TAIL(&snap_bdrv_states, state, entry);
2357
2358        state->ops->prepare(state, &local_err);
2359        if (local_err) {
2360            error_propagate(errp, local_err);
2361            goto delete_and_fail;
2362        }
2363    }
2364
2365    QTAILQ_FOREACH(state, &snap_bdrv_states, entry) {
2366        if (state->ops->commit) {
2367            state->ops->commit(state);
2368        }
2369    }
2370
2371    /* success */
2372    goto exit;
2373
2374delete_and_fail:
2375    /* failure, and it is all-or-none; roll back all operations */
2376    QTAILQ_FOREACH_REVERSE(state, &snap_bdrv_states, entry) {
2377        if (state->ops->abort) {
2378            state->ops->abort(state);
2379        }
2380    }
2381exit:
2382    QTAILQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) {
2383        if (state->ops->clean) {
2384            state->ops->clean(state);
2385        }
2386        g_free(state);
2387    }
2388    if (!has_props) {
2389        qapi_free_TransactionProperties(props);
2390    }
2391    job_txn_unref(block_job_txn);
2392}
2393
2394BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
2395                                                              const char *name,
2396                                                              Error **errp)
2397{
2398    BdrvDirtyBitmap *bitmap;
2399    BlockDriverState *bs;
2400    BlockDirtyBitmapSha256 *ret = NULL;
2401    char *sha256;
2402
2403    bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
2404    if (!bitmap || !bs) {
2405        return NULL;
2406    }
2407
2408    sha256 = bdrv_dirty_bitmap_sha256(bitmap, errp);
2409    if (sha256 == NULL) {
2410        return NULL;
2411    }
2412
2413    ret = g_new(BlockDirtyBitmapSha256, 1);
2414    ret->sha256 = sha256;
2415
2416    return ret;
2417}
2418
2419void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
2420                                   int64_t size, Error **errp)
2421{
2422    Error *local_err = NULL;
2423    BlockBackend *blk;
2424    BlockDriverState *bs;
2425    AioContext *old_ctx;
2426
2427    bs = bdrv_lookup_bs(device, node_name, &local_err);
2428    if (local_err) {
2429        error_propagate(errp, local_err);
2430        return;
2431    }
2432
2433    if (size < 0) {
2434        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
2435        return;
2436    }
2437
2438    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
2439        error_setg(errp, QERR_DEVICE_IN_USE, device);
2440        return;
2441    }
2442
2443    blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
2444    if (!blk) {
2445        return;
2446    }
2447
2448    bdrv_co_lock(bs);
2449    bdrv_drained_begin(bs);
2450    bdrv_co_unlock(bs);
2451
2452    old_ctx = bdrv_co_enter(bs);
2453    blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
2454    bdrv_co_leave(bs, old_ctx);
2455
2456    bdrv_co_lock(bs);
2457    bdrv_drained_end(bs);
2458    blk_co_unref(blk);
2459    bdrv_co_unlock(bs);
2460}
2461
2462void qmp_block_stream(const char *job_id, const char *device,
2463                      const char *base,
2464                      const char *base_node,
2465                      const char *backing_file,
2466                      const char *bottom,
2467                      bool has_speed, int64_t speed,
2468                      bool has_on_error, BlockdevOnError on_error,
2469                      const char *filter_node_name,
2470                      bool has_auto_finalize, bool auto_finalize,
2471                      bool has_auto_dismiss, bool auto_dismiss,
2472                      Error **errp)
2473{
2474    BlockDriverState *bs, *iter, *iter_end;
2475    BlockDriverState *base_bs = NULL;
2476    BlockDriverState *bottom_bs = NULL;
2477    AioContext *aio_context;
2478    Error *local_err = NULL;
2479    int job_flags = JOB_DEFAULT;
2480
2481    if (base && base_node) {
2482        error_setg(errp, "'base' and 'base-node' cannot be specified "
2483                   "at the same time");
2484        return;
2485    }
2486
2487    if (base && bottom) {
2488        error_setg(errp, "'base' and 'bottom' cannot be specified "
2489                   "at the same time");
2490        return;
2491    }
2492
2493    if (bottom && base_node) {
2494        error_setg(errp, "'bottom' and 'base-node' cannot be specified "
2495                   "at the same time");
2496        return;
2497    }
2498
2499    if (!has_on_error) {
2500        on_error = BLOCKDEV_ON_ERROR_REPORT;
2501    }
2502
2503    bs = bdrv_lookup_bs(device, device, errp);
2504    if (!bs) {
2505        return;
2506    }
2507
2508    aio_context = bdrv_get_aio_context(bs);
2509    aio_context_acquire(aio_context);
2510
2511    if (base) {
2512        base_bs = bdrv_find_backing_image(bs, base);
2513        if (base_bs == NULL) {
2514            error_setg(errp, "Can't find '%s' in the backing chain", base);
2515            goto out;
2516        }
2517        assert(bdrv_get_aio_context(base_bs) == aio_context);
2518    }
2519
2520    if (base_node) {
2521        base_bs = bdrv_lookup_bs(NULL, base_node, errp);
2522        if (!base_bs) {
2523            goto out;
2524        }
2525        if (bs == base_bs || !bdrv_chain_contains(bs, base_bs)) {
2526            error_setg(errp, "Node '%s' is not a backing image of '%s'",
2527                       base_node, device);
2528            goto out;
2529        }
2530        assert(bdrv_get_aio_context(base_bs) == aio_context);
2531        bdrv_refresh_filename(base_bs);
2532    }
2533
2534    if (bottom) {
2535        bottom_bs = bdrv_lookup_bs(NULL, bottom, errp);
2536        if (!bottom_bs) {
2537            goto out;
2538        }
2539        if (!bottom_bs->drv) {
2540            error_setg(errp, "Node '%s' is not open", bottom);
2541            goto out;
2542        }
2543        if (bottom_bs->drv->is_filter) {
2544            error_setg(errp, "Node '%s' is a filter, use a non-filter node "
2545                       "as 'bottom'", bottom);
2546            goto out;
2547        }
2548        if (!bdrv_chain_contains(bs, bottom_bs)) {
2549            error_setg(errp, "Node '%s' is not in a chain starting from '%s'",
2550                       bottom, device);
2551            goto out;
2552        }
2553        assert(bdrv_get_aio_context(bottom_bs) == aio_context);
2554    }
2555
2556    /*
2557     * Check for op blockers in the whole chain between bs and base (or bottom)
2558     */
2559    iter_end = bottom ? bdrv_filter_or_cow_bs(bottom_bs) : base_bs;
2560    for (iter = bs; iter && iter != iter_end;
2561         iter = bdrv_filter_or_cow_bs(iter))
2562    {
2563        if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
2564            goto out;
2565        }
2566    }
2567
2568    /* if we are streaming the entire chain, the result will have no backing
2569     * file, and specifying one is therefore an error */
2570    if (!base_bs && backing_file) {
2571        error_setg(errp, "backing file specified, but streaming the "
2572                         "entire chain");
2573        goto out;
2574    }
2575
2576    if (has_auto_finalize && !auto_finalize) {
2577        job_flags |= JOB_MANUAL_FINALIZE;
2578    }
2579    if (has_auto_dismiss && !auto_dismiss) {
2580        job_flags |= JOB_MANUAL_DISMISS;
2581    }
2582
2583    stream_start(job_id, bs, base_bs, backing_file,
2584                 bottom_bs, job_flags, has_speed ? speed : 0, on_error,
2585                 filter_node_name, &local_err);
2586    if (local_err) {
2587        error_propagate(errp, local_err);
2588        goto out;
2589    }
2590
2591    trace_qmp_block_stream(bs);
2592
2593out:
2594    aio_context_release(aio_context);
2595}
2596
2597void qmp_block_commit(const char *job_id, const char *device,
2598                      const char *base_node,
2599                      const char *base,
2600                      const char *top_node,
2601                      const char *top,
2602                      const char *backing_file,
2603                      bool has_speed, int64_t speed,
2604                      bool has_on_error, BlockdevOnError on_error,
2605                      const char *filter_node_name,
2606                      bool has_auto_finalize, bool auto_finalize,
2607                      bool has_auto_dismiss, bool auto_dismiss,
2608                      Error **errp)
2609{
2610    BlockDriverState *bs;
2611    BlockDriverState *iter;
2612    BlockDriverState *base_bs, *top_bs;
2613    AioContext *aio_context;
2614    Error *local_err = NULL;
2615    int job_flags = JOB_DEFAULT;
2616    uint64_t top_perm, top_shared;
2617
2618    if (!has_speed) {
2619        speed = 0;
2620    }
2621    if (!has_on_error) {
2622        on_error = BLOCKDEV_ON_ERROR_REPORT;
2623    }
2624    if (has_auto_finalize && !auto_finalize) {
2625        job_flags |= JOB_MANUAL_FINALIZE;
2626    }
2627    if (has_auto_dismiss && !auto_dismiss) {
2628        job_flags |= JOB_MANUAL_DISMISS;
2629    }
2630
2631    /* Important Note:
2632     *  libvirt relies on the DeviceNotFound error class in order to probe for
2633     *  live commit feature versions; for this to work, we must make sure to
2634     *  perform the device lookup before any generic errors that may occur in a
2635     *  scenario in which all optional arguments are omitted. */
2636    bs = qmp_get_root_bs(device, &local_err);
2637    if (!bs) {
2638        bs = bdrv_lookup_bs(device, device, NULL);
2639        if (!bs) {
2640            error_free(local_err);
2641            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
2642                      "Device '%s' not found", device);
2643        } else {
2644            error_propagate(errp, local_err);
2645        }
2646        return;
2647    }
2648
2649    aio_context = bdrv_get_aio_context(bs);
2650    aio_context_acquire(aio_context);
2651
2652    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) {
2653        goto out;
2654    }
2655
2656    /* default top_bs is the active layer */
2657    top_bs = bs;
2658
2659    if (top_node && top) {
2660        error_setg(errp, "'top-node' and 'top' are mutually exclusive");
2661        goto out;
2662    } else if (top_node) {
2663        top_bs = bdrv_lookup_bs(NULL, top_node, errp);
2664        if (top_bs == NULL) {
2665            goto out;
2666        }
2667        if (!bdrv_chain_contains(bs, top_bs)) {
2668            error_setg(errp, "'%s' is not in this backing file chain",
2669                       top_node);
2670            goto out;
2671        }
2672    } else if (top) {
2673        /* This strcmp() is just a shortcut, there is no need to
2674         * refresh @bs's filename.  If it mismatches,
2675         * bdrv_find_backing_image() will do the refresh and may still
2676         * return @bs. */
2677        if (strcmp(bs->filename, top) != 0) {
2678            top_bs = bdrv_find_backing_image(bs, top);
2679        }
2680    }
2681
2682    if (top_bs == NULL) {
2683        error_setg(errp, "Top image file %s not found", top ? top : "NULL");
2684        goto out;
2685    }
2686
2687    assert(bdrv_get_aio_context(top_bs) == aio_context);
2688
2689    if (base_node && base) {
2690        error_setg(errp, "'base-node' and 'base' are mutually exclusive");
2691        goto out;
2692    } else if (base_node) {
2693        base_bs = bdrv_lookup_bs(NULL, base_node, errp);
2694        if (base_bs == NULL) {
2695            goto out;
2696        }
2697        if (!bdrv_chain_contains(top_bs, base_bs)) {
2698            error_setg(errp, "'%s' is not in this backing file chain",
2699                       base_node);
2700            goto out;
2701        }
2702    } else if (base) {
2703        base_bs = bdrv_find_backing_image(top_bs, base);
2704        if (base_bs == NULL) {
2705            error_setg(errp, "Can't find '%s' in the backing chain", base);
2706            goto out;
2707        }
2708    } else {
2709        base_bs = bdrv_find_base(top_bs);
2710        if (base_bs == NULL) {
2711            error_setg(errp, "There is no backimg image");
2712            goto out;
2713        }
2714    }
2715
2716    assert(bdrv_get_aio_context(base_bs) == aio_context);
2717
2718    for (iter = top_bs; iter != bdrv_filter_or_cow_bs(base_bs);
2719         iter = bdrv_filter_or_cow_bs(iter))
2720    {
2721        if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
2722            goto out;
2723        }
2724    }
2725
2726    /* Do not allow attempts to commit an image into itself */
2727    if (top_bs == base_bs) {
2728        error_setg(errp, "cannot commit an image into itself");
2729        goto out;
2730    }
2731
2732    /*
2733     * Active commit is required if and only if someone has taken a
2734     * WRITE permission on the top node.  Historically, we have always
2735     * used active commit for top nodes, so continue that practice
2736     * lest we possibly break clients that rely on this behavior, e.g.
2737     * to later attach this node to a writing parent.
2738     * (Active commit is never really wrong.)
2739     */
2740    bdrv_get_cumulative_perm(top_bs, &top_perm, &top_shared);
2741    if (top_perm & BLK_PERM_WRITE ||
2742        bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs))
2743    {
2744        if (backing_file) {
2745            if (bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs)) {
2746                error_setg(errp, "'backing-file' specified,"
2747                                 " but 'top' is the active layer");
2748            } else {
2749                error_setg(errp, "'backing-file' specified, but 'top' has a "
2750                                 "writer on it");
2751            }
2752            goto out;
2753        }
2754        if (!job_id) {
2755            /*
2756             * Emulate here what block_job_create() does, because it
2757             * is possible that @bs != @top_bs (the block job should
2758             * be named after @bs, even if @top_bs is the actual
2759             * source)
2760             */
2761            job_id = bdrv_get_device_name(bs);
2762        }
2763        commit_active_start(job_id, top_bs, base_bs, job_flags, speed, on_error,
2764                            filter_node_name, NULL, NULL, false, &local_err);
2765    } else {
2766        BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
2767        if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
2768            goto out;
2769        }
2770        commit_start(job_id, bs, base_bs, top_bs, job_flags,
2771                     speed, on_error, backing_file,
2772                     filter_node_name, &local_err);
2773    }
2774    if (local_err != NULL) {
2775        error_propagate(errp, local_err);
2776        goto out;
2777    }
2778
2779out:
2780    aio_context_release(aio_context);
2781}
2782
2783/* Common QMP interface for drive-backup and blockdev-backup */
2784static BlockJob *do_backup_common(BackupCommon *backup,
2785                                  BlockDriverState *bs,
2786                                  BlockDriverState *target_bs,
2787                                  AioContext *aio_context,
2788                                  JobTxn *txn, Error **errp)
2789{
2790    BlockJob *job = NULL;
2791    BdrvDirtyBitmap *bmap = NULL;
2792    BackupPerf perf = { .max_workers = 64 };
2793    int job_flags = JOB_DEFAULT;
2794
2795    if (!backup->has_speed) {
2796        backup->speed = 0;
2797    }
2798    if (!backup->has_on_source_error) {
2799        backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT;
2800    }
2801    if (!backup->has_on_target_error) {
2802        backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT;
2803    }
2804    if (!backup->has_auto_finalize) {
2805        backup->auto_finalize = true;
2806    }
2807    if (!backup->has_auto_dismiss) {
2808        backup->auto_dismiss = true;
2809    }
2810    if (!backup->has_compress) {
2811        backup->compress = false;
2812    }
2813
2814    if (backup->x_perf) {
2815        if (backup->x_perf->has_use_copy_range) {
2816            perf.use_copy_range = backup->x_perf->use_copy_range;
2817        }
2818        if (backup->x_perf->has_max_workers) {
2819            perf.max_workers = backup->x_perf->max_workers;
2820        }
2821        if (backup->x_perf->has_max_chunk) {
2822            perf.max_chunk = backup->x_perf->max_chunk;
2823        }
2824    }
2825
2826    if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
2827        (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) {
2828        /* done before desugaring 'incremental' to print the right message */
2829        if (!backup->bitmap) {
2830            error_setg(errp, "must provide a valid bitmap name for "
2831                       "'%s' sync mode", MirrorSyncMode_str(backup->sync));
2832            return NULL;
2833        }
2834    }
2835
2836    if (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL) {
2837        if (backup->has_bitmap_mode &&
2838            backup->bitmap_mode != BITMAP_SYNC_MODE_ON_SUCCESS) {
2839            error_setg(errp, "Bitmap sync mode must be '%s' "
2840                       "when using sync mode '%s'",
2841                       BitmapSyncMode_str(BITMAP_SYNC_MODE_ON_SUCCESS),
2842                       MirrorSyncMode_str(backup->sync));
2843            return NULL;
2844        }
2845        backup->has_bitmap_mode = true;
2846        backup->sync = MIRROR_SYNC_MODE_BITMAP;
2847        backup->bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS;
2848    }
2849
2850    if (backup->bitmap) {
2851        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
2852        if (!bmap) {
2853            error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
2854            return NULL;
2855        }
2856        if (!backup->has_bitmap_mode) {
2857            error_setg(errp, "Bitmap sync mode must be given "
2858                       "when providing a bitmap");
2859            return NULL;
2860        }
2861        if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_ALLOW_RO, errp)) {
2862            return NULL;
2863        }
2864
2865        /* This does not produce a useful bitmap artifact: */
2866        if (backup->sync == MIRROR_SYNC_MODE_NONE) {
2867            error_setg(errp, "sync mode '%s' does not produce meaningful bitmap"
2868                       " outputs", MirrorSyncMode_str(backup->sync));
2869            return NULL;
2870        }
2871
2872        /* If the bitmap isn't used for input or output, this is useless: */
2873        if (backup->bitmap_mode == BITMAP_SYNC_MODE_NEVER &&
2874            backup->sync != MIRROR_SYNC_MODE_BITMAP) {
2875            error_setg(errp, "Bitmap sync mode '%s' has no meaningful effect"
2876                       " when combined with sync mode '%s'",
2877                       BitmapSyncMode_str(backup->bitmap_mode),
2878                       MirrorSyncMode_str(backup->sync));
2879            return NULL;
2880        }
2881    }
2882
2883    if (!backup->bitmap && backup->has_bitmap_mode) {
2884        error_setg(errp, "Cannot specify bitmap sync mode without a bitmap");
2885        return NULL;
2886    }
2887
2888    if (!backup->auto_finalize) {
2889        job_flags |= JOB_MANUAL_FINALIZE;
2890    }
2891    if (!backup->auto_dismiss) {
2892        job_flags |= JOB_MANUAL_DISMISS;
2893    }
2894
2895    job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
2896                            backup->sync, bmap, backup->bitmap_mode,
2897                            backup->compress,
2898                            backup->filter_node_name,
2899                            &perf,
2900                            backup->on_source_error,
2901                            backup->on_target_error,
2902                            job_flags, NULL, NULL, txn, errp);
2903    return job;
2904}
2905
2906void qmp_drive_backup(DriveBackup *backup, Error **errp)
2907{
2908    TransactionAction action = {
2909        .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP,
2910        .u.drive_backup.data = backup,
2911    };
2912    blockdev_do_action(&action, errp);
2913}
2914
2915BlockDeviceInfoList *qmp_query_named_block_nodes(bool has_flat,
2916                                                 bool flat,
2917                                                 Error **errp)
2918{
2919    bool return_flat = has_flat && flat;
2920
2921    return bdrv_named_nodes_list(return_flat, errp);
2922}
2923
2924XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp)
2925{
2926    return bdrv_get_xdbg_block_graph(errp);
2927}
2928
2929void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp)
2930{
2931    TransactionAction action = {
2932        .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP,
2933        .u.blockdev_backup.data = backup,
2934    };
2935    blockdev_do_action(&action, errp);
2936}
2937
2938/* Parameter check and block job starting for drive mirroring.
2939 * Caller should hold @device and @target's aio context (must be the same).
2940 **/
2941static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
2942                                   BlockDriverState *target,
2943                                   const char *replaces,
2944                                   enum MirrorSyncMode sync,
2945                                   BlockMirrorBackingMode backing_mode,
2946                                   bool zero_target,
2947                                   bool has_speed, int64_t speed,
2948                                   bool has_granularity, uint32_t granularity,
2949                                   bool has_buf_size, int64_t buf_size,
2950                                   bool has_on_source_error,
2951                                   BlockdevOnError on_source_error,
2952                                   bool has_on_target_error,
2953                                   BlockdevOnError on_target_error,
2954                                   bool has_unmap, bool unmap,
2955                                   const char *filter_node_name,
2956                                   bool has_copy_mode, MirrorCopyMode copy_mode,
2957                                   bool has_auto_finalize, bool auto_finalize,
2958                                   bool has_auto_dismiss, bool auto_dismiss,
2959                                   Error **errp)
2960{
2961    BlockDriverState *unfiltered_bs;
2962    int job_flags = JOB_DEFAULT;
2963
2964    if (!has_speed) {
2965        speed = 0;
2966    }
2967    if (!has_on_source_error) {
2968        on_source_error = BLOCKDEV_ON_ERROR_REPORT;
2969    }
2970    if (!has_on_target_error) {
2971        on_target_error = BLOCKDEV_ON_ERROR_REPORT;
2972    }
2973    if (!has_granularity) {
2974        granularity = 0;
2975    }
2976    if (!has_buf_size) {
2977        buf_size = 0;
2978    }
2979    if (!has_unmap) {
2980        unmap = true;
2981    }
2982    if (!has_copy_mode) {
2983        copy_mode = MIRROR_COPY_MODE_BACKGROUND;
2984    }
2985    if (has_auto_finalize && !auto_finalize) {
2986        job_flags |= JOB_MANUAL_FINALIZE;
2987    }
2988    if (has_auto_dismiss && !auto_dismiss) {
2989        job_flags |= JOB_MANUAL_DISMISS;
2990    }
2991
2992    if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
2993        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
2994                   "a value in range [512B, 64MB]");
2995        return;
2996    }
2997    if (granularity & (granularity - 1)) {
2998        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
2999                   "a power of 2");
3000        return;
3001    }
3002
3003    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) {
3004        return;
3005    }
3006    if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_MIRROR_TARGET, errp)) {
3007        return;
3008    }
3009
3010    if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
3011        sync = MIRROR_SYNC_MODE_FULL;
3012    }
3013
3014    if (!replaces) {
3015        /* We want to mirror from @bs, but keep implicit filters on top */
3016        unfiltered_bs = bdrv_skip_implicit_filters(bs);
3017        if (unfiltered_bs != bs) {
3018            replaces = unfiltered_bs->node_name;
3019        }
3020    }
3021
3022    if (replaces) {
3023        BlockDriverState *to_replace_bs;
3024        AioContext *replace_aio_context;
3025        int64_t bs_size, replace_size;
3026
3027        bs_size = bdrv_getlength(bs);
3028        if (bs_size < 0) {
3029            error_setg_errno(errp, -bs_size, "Failed to query device's size");
3030            return;
3031        }
3032
3033        to_replace_bs = check_to_replace_node(bs, replaces, errp);
3034        if (!to_replace_bs) {
3035            return;
3036        }
3037
3038        replace_aio_context = bdrv_get_aio_context(to_replace_bs);
3039        aio_context_acquire(replace_aio_context);
3040        replace_size = bdrv_getlength(to_replace_bs);
3041        aio_context_release(replace_aio_context);
3042
3043        if (replace_size < 0) {
3044            error_setg_errno(errp, -replace_size,
3045                             "Failed to query the replacement node's size");
3046            return;
3047        }
3048        if (bs_size != replace_size) {
3049            error_setg(errp, "cannot replace image with a mirror image of "
3050                             "different size");
3051            return;
3052        }
3053    }
3054
3055    /* pass the node name to replace to mirror start since it's loose coupling
3056     * and will allow to check whether the node still exist at mirror completion
3057     */
3058    mirror_start(job_id, bs, target,
3059                 replaces, job_flags,
3060                 speed, granularity, buf_size, sync, backing_mode, zero_target,
3061                 on_source_error, on_target_error, unmap, filter_node_name,
3062                 copy_mode, errp);
3063}
3064
3065void qmp_drive_mirror(DriveMirror *arg, Error **errp)
3066{
3067    BlockDriverState *bs;
3068    BlockDriverState *target_backing_bs, *target_bs;
3069    AioContext *aio_context;
3070    AioContext *old_context;
3071    BlockMirrorBackingMode backing_mode;
3072    Error *local_err = NULL;
3073    QDict *options = NULL;
3074    int flags;
3075    int64_t size;
3076    const char *format = arg->format;
3077    bool zero_target;
3078    int ret;
3079
3080    bs = qmp_get_root_bs(arg->device, errp);
3081    if (!bs) {
3082        return;
3083    }
3084
3085    /* Early check to avoid creating target */
3086    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) {
3087        return;
3088    }
3089
3090    aio_context = bdrv_get_aio_context(bs);
3091    aio_context_acquire(aio_context);
3092
3093    if (!arg->has_mode) {
3094        arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
3095    }
3096
3097    if (!arg->format) {
3098        format = (arg->mode == NEW_IMAGE_MODE_EXISTING
3099                  ? NULL : bs->drv->format_name);
3100    }
3101
3102    flags = bs->open_flags | BDRV_O_RDWR;
3103    target_backing_bs = bdrv_cow_bs(bdrv_skip_filters(bs));
3104    if (!target_backing_bs && arg->sync == MIRROR_SYNC_MODE_TOP) {
3105        arg->sync = MIRROR_SYNC_MODE_FULL;
3106    }
3107    if (arg->sync == MIRROR_SYNC_MODE_NONE) {
3108        target_backing_bs = bs;
3109    }
3110
3111    size = bdrv_getlength(bs);
3112    if (size < 0) {
3113        error_setg_errno(errp, -size, "bdrv_getlength failed");
3114        goto out;
3115    }
3116
3117    if (arg->replaces) {
3118        if (!arg->node_name) {
3119            error_setg(errp, "a node-name must be provided when replacing a"
3120                             " named node of the graph");
3121            goto out;
3122        }
3123    }
3124
3125    if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
3126        backing_mode = MIRROR_SOURCE_BACKING_CHAIN;
3127    } else {
3128        backing_mode = MIRROR_OPEN_BACKING_CHAIN;
3129    }
3130
3131    /* Don't open backing image in create() */
3132    flags |= BDRV_O_NO_BACKING;
3133
3134    if ((arg->sync == MIRROR_SYNC_MODE_FULL || !target_backing_bs)
3135        && arg->mode != NEW_IMAGE_MODE_EXISTING)
3136    {
3137        /* create new image w/o backing file */
3138        assert(format);
3139        bdrv_img_create(arg->target, format,
3140                        NULL, NULL, NULL, size, flags, false, &local_err);
3141    } else {
3142        /* Implicit filters should not appear in the filename */
3143        BlockDriverState *explicit_backing =
3144            bdrv_skip_implicit_filters(target_backing_bs);
3145
3146        switch (arg->mode) {
3147        case NEW_IMAGE_MODE_EXISTING:
3148            break;
3149        case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
3150            /* create new image with backing file */
3151            bdrv_refresh_filename(explicit_backing);
3152            bdrv_img_create(arg->target, format,
3153                            explicit_backing->filename,
3154                            explicit_backing->drv->format_name,
3155                            NULL, size, flags, false, &local_err);
3156            break;
3157        default:
3158            abort();
3159        }
3160    }
3161
3162    if (local_err) {
3163        error_propagate(errp, local_err);
3164        goto out;
3165    }
3166
3167    options = qdict_new();
3168    if (arg->node_name) {
3169        qdict_put_str(options, "node-name", arg->node_name);
3170    }
3171    if (format) {
3172        qdict_put_str(options, "driver", format);
3173    }
3174
3175    /* Mirroring takes care of copy-on-write using the source's backing
3176     * file.
3177     */
3178    target_bs = bdrv_open(arg->target, NULL, options, flags, errp);
3179    if (!target_bs) {
3180        goto out;
3181    }
3182
3183    zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL &&
3184                   (arg->mode == NEW_IMAGE_MODE_EXISTING ||
3185                    !bdrv_has_zero_init(target_bs)));
3186
3187
3188    /* Honor bdrv_try_change_aio_context() context acquisition requirements. */
3189    old_context = bdrv_get_aio_context(target_bs);
3190    aio_context_release(aio_context);
3191    aio_context_acquire(old_context);
3192
3193    ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
3194    if (ret < 0) {
3195        bdrv_unref(target_bs);
3196        aio_context_release(old_context);
3197        return;
3198    }
3199
3200    aio_context_release(old_context);
3201    aio_context_acquire(aio_context);
3202
3203    blockdev_mirror_common(arg->job_id, bs, target_bs,
3204                           arg->replaces, arg->sync,
3205                           backing_mode, zero_target,
3206                           arg->has_speed, arg->speed,
3207                           arg->has_granularity, arg->granularity,
3208                           arg->has_buf_size, arg->buf_size,
3209                           arg->has_on_source_error, arg->on_source_error,
3210                           arg->has_on_target_error, arg->on_target_error,
3211                           arg->has_unmap, arg->unmap,
3212                           NULL,
3213                           arg->has_copy_mode, arg->copy_mode,
3214                           arg->has_auto_finalize, arg->auto_finalize,
3215                           arg->has_auto_dismiss, arg->auto_dismiss,
3216                           errp);
3217    bdrv_unref(target_bs);
3218out:
3219    aio_context_release(aio_context);
3220}
3221
3222void qmp_blockdev_mirror(const char *job_id,
3223                         const char *device, const char *target,
3224                         const char *replaces,
3225                         MirrorSyncMode sync,
3226                         bool has_speed, int64_t speed,
3227                         bool has_granularity, uint32_t granularity,
3228                         bool has_buf_size, int64_t buf_size,
3229                         bool has_on_source_error,
3230                         BlockdevOnError on_source_error,
3231                         bool has_on_target_error,
3232                         BlockdevOnError on_target_error,
3233                         const char *filter_node_name,
3234                         bool has_copy_mode, MirrorCopyMode copy_mode,
3235                         bool has_auto_finalize, bool auto_finalize,
3236                         bool has_auto_dismiss, bool auto_dismiss,
3237                         Error **errp)
3238{
3239    BlockDriverState *bs;
3240    BlockDriverState *target_bs;
3241    AioContext *aio_context;
3242    AioContext *old_context;
3243    BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
3244    bool zero_target;
3245    int ret;
3246
3247    bs = qmp_get_root_bs(device, errp);
3248    if (!bs) {
3249        return;
3250    }
3251
3252    target_bs = bdrv_lookup_bs(target, target, errp);
3253    if (!target_bs) {
3254        return;
3255    }
3256
3257    zero_target = (sync == MIRROR_SYNC_MODE_FULL);
3258
3259    /* Honor bdrv_try_change_aio_context() context acquisition requirements. */
3260    old_context = bdrv_get_aio_context(target_bs);
3261    aio_context = bdrv_get_aio_context(bs);
3262    aio_context_acquire(old_context);
3263
3264    ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
3265
3266    aio_context_release(old_context);
3267    aio_context_acquire(aio_context);
3268
3269    if (ret < 0) {
3270        goto out;
3271    }
3272
3273    blockdev_mirror_common(job_id, bs, target_bs,
3274                           replaces, sync, backing_mode,
3275                           zero_target, has_speed, speed,
3276                           has_granularity, granularity,
3277                           has_buf_size, buf_size,
3278                           has_on_source_error, on_source_error,
3279                           has_on_target_error, on_target_error,
3280                           true, true, filter_node_name,
3281                           has_copy_mode, copy_mode,
3282                           has_auto_finalize, auto_finalize,
3283                           has_auto_dismiss, auto_dismiss,
3284                           errp);
3285out:
3286    aio_context_release(aio_context);
3287}
3288
3289/*
3290 * Get a block job using its ID. Called with job_mutex held.
3291 */
3292static BlockJob *find_block_job_locked(const char *id, Error **errp)
3293{
3294    BlockJob *job;
3295
3296    assert(id != NULL);
3297
3298    job = block_job_get_locked(id);
3299
3300    if (!job) {
3301        error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
3302                  "Block job '%s' not found", id);
3303        return NULL;
3304    }
3305
3306    return job;
3307}
3308
3309void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
3310{
3311    BlockJob *job;
3312
3313    JOB_LOCK_GUARD();
3314    job = find_block_job_locked(device, errp);
3315
3316    if (!job) {
3317        return;
3318    }
3319
3320    block_job_set_speed_locked(job, speed, errp);
3321}
3322
3323void qmp_block_job_cancel(const char *device,
3324                          bool has_force, bool force, Error **errp)
3325{
3326    BlockJob *job;
3327
3328    JOB_LOCK_GUARD();
3329    job = find_block_job_locked(device, errp);
3330
3331    if (!job) {
3332        return;
3333    }
3334
3335    if (!has_force) {
3336        force = false;
3337    }
3338
3339    if (job_user_paused_locked(&job->job) && !force) {
3340        error_setg(errp, "The block job for device '%s' is currently paused",
3341                   device);
3342        return;
3343    }
3344
3345    trace_qmp_block_job_cancel(job);
3346    job_user_cancel_locked(&job->job, force, errp);
3347}
3348
3349void qmp_block_job_pause(const char *device, Error **errp)
3350{
3351    BlockJob *job;
3352
3353    JOB_LOCK_GUARD();
3354    job = find_block_job_locked(device, errp);
3355
3356    if (!job) {
3357        return;
3358    }
3359
3360    trace_qmp_block_job_pause(job);
3361    job_user_pause_locked(&job->job, errp);
3362}
3363
3364void qmp_block_job_resume(const char *device, Error **errp)
3365{
3366    BlockJob *job;
3367
3368    JOB_LOCK_GUARD();
3369    job = find_block_job_locked(device, errp);
3370
3371    if (!job) {
3372        return;
3373    }
3374
3375    trace_qmp_block_job_resume(job);
3376    job_user_resume_locked(&job->job, errp);
3377}
3378
3379void qmp_block_job_complete(const char *device, Error **errp)
3380{
3381    BlockJob *job;
3382
3383    JOB_LOCK_GUARD();
3384    job = find_block_job_locked(device, errp);
3385
3386    if (!job) {
3387        return;
3388    }
3389
3390    trace_qmp_block_job_complete(job);
3391    job_complete_locked(&job->job, errp);
3392}
3393
3394void qmp_block_job_finalize(const char *id, Error **errp)
3395{
3396    BlockJob *job;
3397
3398    JOB_LOCK_GUARD();
3399    job = find_block_job_locked(id, errp);
3400
3401    if (!job) {
3402        return;
3403    }
3404
3405    trace_qmp_block_job_finalize(job);
3406    job_ref_locked(&job->job);
3407    job_finalize_locked(&job->job, errp);
3408
3409    job_unref_locked(&job->job);
3410}
3411
3412void qmp_block_job_dismiss(const char *id, Error **errp)
3413{
3414    BlockJob *bjob;
3415    Job *job;
3416
3417    JOB_LOCK_GUARD();
3418    bjob = find_block_job_locked(id, errp);
3419
3420    if (!bjob) {
3421        return;
3422    }
3423
3424    trace_qmp_block_job_dismiss(bjob);
3425    job = &bjob->job;
3426    job_dismiss_locked(&job, errp);
3427}
3428
3429void qmp_change_backing_file(const char *device,
3430                             const char *image_node_name,
3431                             const char *backing_file,
3432                             Error **errp)
3433{
3434    BlockDriverState *bs = NULL;
3435    AioContext *aio_context;
3436    BlockDriverState *image_bs = NULL;
3437    Error *local_err = NULL;
3438    bool ro;
3439    int ret;
3440
3441    bs = qmp_get_root_bs(device, errp);
3442    if (!bs) {
3443        return;
3444    }
3445
3446    aio_context = bdrv_get_aio_context(bs);
3447    aio_context_acquire(aio_context);
3448
3449    image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err);
3450    if (local_err) {
3451        error_propagate(errp, local_err);
3452        goto out;
3453    }
3454
3455    if (!image_bs) {
3456        error_setg(errp, "image file not found");
3457        goto out;
3458    }
3459
3460    if (bdrv_find_base(image_bs) == image_bs) {
3461        error_setg(errp, "not allowing backing file change on an image "
3462                         "without a backing file");
3463        goto out;
3464    }
3465
3466    /* even though we are not necessarily operating on bs, we need it to
3467     * determine if block ops are currently prohibited on the chain */
3468    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_CHANGE, errp)) {
3469        goto out;
3470    }
3471
3472    /* final sanity check */
3473    if (!bdrv_chain_contains(bs, image_bs)) {
3474        error_setg(errp, "'%s' and image file are not in the same chain",
3475                   device);
3476        goto out;
3477    }
3478
3479    /* if not r/w, reopen to make r/w */
3480    ro = bdrv_is_read_only(image_bs);
3481
3482    if (ro) {
3483        if (bdrv_reopen_set_read_only(image_bs, false, errp) != 0) {
3484            goto out;
3485        }
3486    }
3487
3488    ret = bdrv_change_backing_file(image_bs, backing_file,
3489                                   image_bs->drv ? image_bs->drv->format_name : "",
3490                                   false);
3491
3492    if (ret < 0) {
3493        error_setg_errno(errp, -ret, "Could not change backing file to '%s'",
3494                         backing_file);
3495        /* don't exit here, so we can try to restore open flags if
3496         * appropriate */
3497    }
3498
3499    if (ro) {
3500        bdrv_reopen_set_read_only(image_bs, true, errp);
3501    }
3502
3503out:
3504    aio_context_release(aio_context);
3505}
3506
3507void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
3508{
3509    BlockDriverState *bs;
3510    QObject *obj;
3511    Visitor *v = qobject_output_visitor_new(&obj);
3512    QDict *qdict;
3513
3514    visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
3515    visit_complete(v, &obj);
3516    qdict = qobject_to(QDict, obj);
3517
3518    qdict_flatten(qdict);
3519
3520    if (!qdict_get_try_str(qdict, "node-name")) {
3521        error_setg(errp, "'node-name' must be specified for the root node");
3522        goto fail;
3523    }
3524
3525    bs = bds_tree_init(qdict, errp);
3526    if (!bs) {
3527        goto fail;
3528    }
3529
3530    bdrv_set_monitor_owned(bs);
3531
3532fail:
3533    visit_free(v);
3534}
3535
3536void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
3537{
3538    BlockReopenQueue *queue = NULL;
3539
3540    /* Add each one of the BDS that we want to reopen to the queue */
3541    for (; reopen_list != NULL; reopen_list = reopen_list->next) {
3542        BlockdevOptions *options = reopen_list->value;
3543        BlockDriverState *bs;
3544        AioContext *ctx;
3545        QObject *obj;
3546        Visitor *v;
3547        QDict *qdict;
3548
3549        /* Check for the selected node name */
3550        if (!options->node_name) {
3551            error_setg(errp, "node-name not specified");
3552            goto fail;
3553        }
3554
3555        bs = bdrv_find_node(options->node_name);
3556        if (!bs) {
3557            error_setg(errp, "Failed to find node with node-name='%s'",
3558                       options->node_name);
3559            goto fail;
3560        }
3561
3562        /* Put all options in a QDict and flatten it */
3563        v = qobject_output_visitor_new(&obj);
3564        visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
3565        visit_complete(v, &obj);
3566        visit_free(v);
3567
3568        qdict = qobject_to(QDict, obj);
3569
3570        qdict_flatten(qdict);
3571
3572        ctx = bdrv_get_aio_context(bs);
3573        aio_context_acquire(ctx);
3574
3575        queue = bdrv_reopen_queue(queue, bs, qdict, false);
3576
3577        aio_context_release(ctx);
3578    }
3579
3580    /* Perform the reopen operation */
3581    bdrv_reopen_multiple(queue, errp);
3582    queue = NULL;
3583
3584fail:
3585    bdrv_reopen_queue_free(queue);
3586}
3587
3588void qmp_blockdev_del(const char *node_name, Error **errp)
3589{
3590    AioContext *aio_context;
3591    BlockDriverState *bs;
3592
3593    GLOBAL_STATE_CODE();
3594
3595    bs = bdrv_find_node(node_name);
3596    if (!bs) {
3597        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
3598        return;
3599    }
3600    if (bdrv_has_blk(bs)) {
3601        error_setg(errp, "Node %s is in use", node_name);
3602        return;
3603    }
3604    aio_context = bdrv_get_aio_context(bs);
3605    aio_context_acquire(aio_context);
3606
3607    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) {
3608        goto out;
3609    }
3610
3611    if (!QTAILQ_IN_USE(bs, monitor_list)) {
3612        error_setg(errp, "Node %s is not owned by the monitor",
3613                   bs->node_name);
3614        goto out;
3615    }
3616
3617    if (bs->refcnt > 1) {
3618        error_setg(errp, "Block device %s is in use",
3619                   bdrv_get_device_or_node_name(bs));
3620        goto out;
3621    }
3622
3623    QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
3624    bdrv_unref(bs);
3625
3626out:
3627    aio_context_release(aio_context);
3628}
3629
3630static BdrvChild *bdrv_find_child(BlockDriverState *parent_bs,
3631                                  const char *child_name)
3632{
3633    BdrvChild *child;
3634
3635    QLIST_FOREACH(child, &parent_bs->children, next) {
3636        if (strcmp(child->name, child_name) == 0) {
3637            return child;
3638        }
3639    }
3640
3641    return NULL;
3642}
3643
3644void qmp_x_blockdev_change(const char *parent, const char *child,
3645                           const char *node, Error **errp)
3646{
3647    BlockDriverState *parent_bs, *new_bs = NULL;
3648    BdrvChild *p_child;
3649
3650    parent_bs = bdrv_lookup_bs(parent, parent, errp);
3651    if (!parent_bs) {
3652        return;
3653    }
3654
3655    if (!child == !node) {
3656        if (child) {
3657            error_setg(errp, "The parameters child and node are in conflict");
3658        } else {
3659            error_setg(errp, "Either child or node must be specified");
3660        }
3661        return;
3662    }
3663
3664    if (child) {
3665        p_child = bdrv_find_child(parent_bs, child);
3666        if (!p_child) {
3667            error_setg(errp, "Node '%s' does not have child '%s'",
3668                       parent, child);
3669            return;
3670        }
3671        bdrv_del_child(parent_bs, p_child, errp);
3672    }
3673
3674    if (node) {
3675        new_bs = bdrv_find_node(node);
3676        if (!new_bs) {
3677            error_setg(errp, "Node '%s' not found", node);
3678            return;
3679        }
3680        bdrv_add_child(parent_bs, new_bs, errp);
3681    }
3682}
3683
3684BlockJobInfoList *qmp_query_block_jobs(Error **errp)
3685{
3686    BlockJobInfoList *head = NULL, **tail = &head;
3687    BlockJob *job;
3688
3689    JOB_LOCK_GUARD();
3690
3691    for (job = block_job_next_locked(NULL); job;
3692         job = block_job_next_locked(job)) {
3693        BlockJobInfo *value;
3694
3695        if (block_job_is_internal(job)) {
3696            continue;
3697        }
3698        value = block_job_query_locked(job, errp);
3699        if (!value) {
3700            qapi_free_BlockJobInfoList(head);
3701            return NULL;
3702        }
3703        QAPI_LIST_APPEND(tail, value);
3704    }
3705
3706    return head;
3707}
3708
3709void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
3710                                 bool has_force, bool force, Error **errp)
3711{
3712    AioContext *old_context;
3713    AioContext *new_context;
3714    BlockDriverState *bs;
3715
3716    bs = bdrv_find_node(node_name);
3717    if (!bs) {
3718        error_setg(errp, "Failed to find node with node-name='%s'", node_name);
3719        return;
3720    }
3721
3722    /* Protects against accidents. */
3723    if (!(has_force && force) && bdrv_has_blk(bs)) {
3724        error_setg(errp, "Node %s is associated with a BlockBackend and could "
3725                         "be in use (use force=true to override this check)",
3726                         node_name);
3727        return;
3728    }
3729
3730    if (iothread->type == QTYPE_QSTRING) {
3731        IOThread *obj = iothread_by_id(iothread->u.s);
3732        if (!obj) {
3733            error_setg(errp, "Cannot find iothread %s", iothread->u.s);
3734            return;
3735        }
3736
3737        new_context = iothread_get_aio_context(obj);
3738    } else {
3739        new_context = qemu_get_aio_context();
3740    }
3741
3742    old_context = bdrv_get_aio_context(bs);
3743    aio_context_acquire(old_context);
3744
3745    bdrv_try_change_aio_context(bs, new_context, NULL, errp);
3746
3747    aio_context_release(old_context);
3748}
3749
3750QemuOptsList qemu_common_drive_opts = {
3751    .name = "drive",
3752    .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
3753    .desc = {
3754        {
3755            .name = "snapshot",
3756            .type = QEMU_OPT_BOOL,
3757            .help = "enable/disable snapshot mode",
3758        },{
3759            .name = "aio",
3760            .type = QEMU_OPT_STRING,
3761            .help = "host AIO implementation (threads, native, io_uring)",
3762        },{
3763            .name = BDRV_OPT_CACHE_WB,
3764            .type = QEMU_OPT_BOOL,
3765            .help = "Enable writeback mode",
3766        },{
3767            .name = "format",
3768            .type = QEMU_OPT_STRING,
3769            .help = "disk format (raw, qcow2, ...)",
3770        },{
3771            .name = "rerror",
3772            .type = QEMU_OPT_STRING,
3773            .help = "read error action",
3774        },{
3775            .name = "werror",
3776            .type = QEMU_OPT_STRING,
3777            .help = "write error action",
3778        },{
3779            .name = BDRV_OPT_READ_ONLY,
3780            .type = QEMU_OPT_BOOL,
3781            .help = "open drive file as read-only",
3782        },
3783
3784        THROTTLE_OPTS,
3785
3786        {
3787            .name = "throttling.group",
3788            .type = QEMU_OPT_STRING,
3789            .help = "name of the block throttling group",
3790        },{
3791            .name = "copy-on-read",
3792            .type = QEMU_OPT_BOOL,
3793            .help = "copy read data from backing file into image file",
3794        },{
3795            .name = "detect-zeroes",
3796            .type = QEMU_OPT_STRING,
3797            .help = "try to optimize zero writes (off, on, unmap)",
3798        },{
3799            .name = "stats-account-invalid",
3800            .type = QEMU_OPT_BOOL,
3801            .help = "whether to account for invalid I/O operations "
3802                    "in the statistics",
3803        },{
3804            .name = "stats-account-failed",
3805            .type = QEMU_OPT_BOOL,
3806            .help = "whether to account for failed I/O operations "
3807                    "in the statistics",
3808        },
3809        { /* end of list */ }
3810    },
3811};
3812
3813QemuOptsList qemu_drive_opts = {
3814    .name = "drive",
3815    .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
3816    .desc = {
3817        /*
3818         * no elements => accept any params
3819         * validation will happen later
3820         */
3821        { /* end of list */ }
3822    },
3823};
3824