qemu/qemu-img.c
<<
>>
Prefs
   1/*
   2 * QEMU disk image utility
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include <getopt.h>
  27
  28#include "qemu/help-texts.h"
  29#include "qemu/qemu-progress.h"
  30#include "qemu-version.h"
  31#include "qapi/error.h"
  32#include "qapi/qapi-commands-block-core.h"
  33#include "qapi/qapi-visit-block-core.h"
  34#include "qapi/qobject-output-visitor.h"
  35#include "qobject/qjson.h"
  36#include "qobject/qdict.h"
  37#include "qemu/cutils.h"
  38#include "qemu/config-file.h"
  39#include "qemu/option.h"
  40#include "qemu/error-report.h"
  41#include "qemu/log.h"
  42#include "qemu/main-loop.h"
  43#include "qemu/module.h"
  44#include "qemu/sockets.h"
  45#include "qemu/units.h"
  46#include "qemu/memalign.h"
  47#include "qom/object_interfaces.h"
  48#include "system/block-backend.h"
  49#include "block/block_int.h"
  50#include "block/blockjob.h"
  51#include "block/dirty-bitmap.h"
  52#include "block/qapi.h"
  53#include "crypto/init.h"
  54#include "trace/control.h"
  55#include "qemu/throttle.h"
  56#include "block/throttle-groups.h"
  57
  58#define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
  59                          "\n" QEMU_COPYRIGHT "\n"
  60
  61typedef struct img_cmd_t {
  62    const char *name;
  63    int (*handler)(const struct img_cmd_t *ccmd, int argc, char **argv);
  64    const char *description;
  65} img_cmd_t;
  66
  67enum {
  68    OPTION_OUTPUT = 256,
  69    OPTION_BACKING_CHAIN = 257,
  70    OPTION_OBJECT = 258,
  71    OPTION_IMAGE_OPTS = 259,
  72    OPTION_PATTERN = 260,
  73    OPTION_FLUSH_INTERVAL = 261,
  74    OPTION_NO_DRAIN = 262,
  75    OPTION_TARGET_IMAGE_OPTS = 263,
  76    OPTION_PREALLOCATION = 265,
  77    OPTION_SHRINK = 266,
  78    OPTION_SALVAGE = 267,
  79    OPTION_TARGET_IS_ZERO = 268,
  80    OPTION_ADD = 269,
  81    OPTION_REMOVE = 270,
  82    OPTION_CLEAR = 271,
  83    OPTION_ENABLE = 272,
  84    OPTION_DISABLE = 273,
  85    OPTION_MERGE = 274,
  86    OPTION_BITMAPS = 275,
  87    OPTION_FORCE = 276,
  88    OPTION_SKIP_BROKEN = 277,
  89};
  90
  91typedef enum OutputFormat {
  92    OFORMAT_JSON,
  93    OFORMAT_HUMAN,
  94} OutputFormat;
  95
  96/* Default to cache=writeback as data integrity is not important for qemu-img */
  97#define BDRV_DEFAULT_CACHE "writeback"
  98
  99static G_NORETURN
 100void tryhelp(const char *argv0)
 101{
 102    error_printf("Try '%s --help' for more information\n", argv0);
 103    exit(EXIT_FAILURE);
 104}
 105
 106static G_NORETURN G_GNUC_PRINTF(2, 3)
 107void error_exit(const char *argv0, const char *fmt, ...)
 108{
 109    va_list ap;
 110
 111    va_start(ap, fmt);
 112    error_vreport(fmt, ap);
 113    va_end(ap);
 114
 115    tryhelp(argv0);
 116}
 117
 118/*
 119 * Print --help output for a command and exit.
 120 * @syntax and @description are multi-line with trailing EOL
 121 * (to allow easy extending of the text)
 122 * @syntax has each subsequent line indented by 8 chars.
 123 * @description is indented by 2 chars for argument on each own line,
 124 * and with 5 chars for argument description (like -h arg below).
 125 */
 126static G_NORETURN
 127void cmd_help(const img_cmd_t *ccmd,
 128              const char *syntax, const char *arguments)
 129{
 130    printf(
 131"Usage:\n"
 132"  %s %s %s\n"
 133"%s.\n"
 134"\n"
 135"Arguments:\n"
 136"  -h, --help\n"
 137"     print this help and exit\n"
 138"%s\n",
 139           "qemu-img", ccmd->name, syntax, ccmd->description, arguments);
 140    exit(EXIT_SUCCESS);
 141}
 142
 143static OutputFormat parse_output_format(const char *argv0, const char *arg)
 144{
 145    if (!strcmp(arg, "json")) {
 146        return OFORMAT_JSON;
 147    } else if (!strcmp(arg, "human")) {
 148        return OFORMAT_HUMAN;
 149    } else {
 150        error_exit(argv0, "--output expects 'human' or 'json', not '%s'", arg);
 151    }
 152}
 153
 154/*
 155 * Is @list safe for accumulate_options()?
 156 * It is when multiple of them can be joined together separated by ','.
 157 * To make that work, @list must not start with ',' (or else a
 158 * separating ',' preceding it gets escaped), and it must not end with
 159 * an odd number of ',' (or else a separating ',' following it gets
 160 * escaped), or be empty (or else a separating ',' preceding it can
 161 * escape a separating ',' following it).
 162 * 
 163 */
 164static bool is_valid_option_list(const char *list)
 165{
 166    size_t len = strlen(list);
 167    size_t i;
 168
 169    if (!list[0] || list[0] == ',') {
 170        return false;
 171    }
 172
 173    for (i = len; i > 0 && list[i - 1] == ','; i--) {
 174    }
 175    if ((len - i) % 2) {
 176        return false;
 177    }
 178
 179    return true;
 180}
 181
 182static int accumulate_options(char **options, char *list)
 183{
 184    char *new_options;
 185
 186    if (!is_valid_option_list(list)) {
 187        error_report("Invalid option list: %s", list);
 188        return -1;
 189    }
 190
 191    if (!*options) {
 192        *options = g_strdup(list);
 193    } else {
 194        new_options = g_strdup_printf("%s,%s", *options, list);
 195        g_free(*options);
 196        *options = new_options;
 197    }
 198    return 0;
 199}
 200
 201static QemuOptsList qemu_source_opts = {
 202    .name = "source",
 203    .implied_opt_name = "file",
 204    .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
 205    .desc = {
 206        { }
 207    },
 208};
 209
 210static int G_GNUC_PRINTF(2, 3) qprintf(bool quiet, const char *fmt, ...)
 211{
 212    int ret = 0;
 213    if (!quiet) {
 214        va_list args;
 215        va_start(args, fmt);
 216        ret = vprintf(fmt, args);
 217        va_end(args);
 218    }
 219    return ret;
 220}
 221
 222
 223static int print_block_option_help(const char *filename, const char *fmt)
 224{
 225    BlockDriver *drv, *proto_drv;
 226    QemuOptsList *create_opts = NULL;
 227    Error *local_err = NULL;
 228
 229    /* Find driver and parse its options */
 230    drv = bdrv_find_format(fmt);
 231    if (!drv) {
 232        error_report("Unknown file format '%s'", fmt);
 233        return 1;
 234    }
 235
 236    if (!drv->create_opts) {
 237        error_report("Format driver '%s' does not support image creation", fmt);
 238        return 1;
 239    }
 240
 241    create_opts = qemu_opts_append(create_opts, drv->create_opts);
 242    if (filename) {
 243        proto_drv = bdrv_find_protocol(filename, true, &local_err);
 244        if (!proto_drv) {
 245            error_report_err(local_err);
 246            qemu_opts_free(create_opts);
 247            return 1;
 248        }
 249        if (!proto_drv->create_opts) {
 250            error_report("Protocol driver '%s' does not support image creation",
 251                         proto_drv->format_name);
 252            qemu_opts_free(create_opts);
 253            return 1;
 254        }
 255        create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
 256    }
 257
 258    if (filename) {
 259        printf("Supported options:\n");
 260    } else {
 261        printf("Supported %s options:\n", fmt);
 262    }
 263    qemu_opts_print_help(create_opts, false);
 264    qemu_opts_free(create_opts);
 265
 266    if (!filename) {
 267        printf("\n"
 268               "The protocol level may support further options.\n"
 269               "Specify the target filename to include those options.\n");
 270    }
 271
 272    return 0;
 273}
 274
 275
 276static BlockBackend *img_open_opts(const char *optstr,
 277                                   QemuOpts *opts, int flags, bool writethrough,
 278                                   bool quiet, bool force_share)
 279{
 280    QDict *options;
 281    Error *local_err = NULL;
 282    BlockBackend *blk;
 283    options = qemu_opts_to_qdict(opts, NULL);
 284    if (force_share) {
 285        if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
 286            && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
 287            error_report("--force-share/-U conflicts with image options");
 288            qobject_unref(options);
 289            return NULL;
 290        }
 291        qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
 292    }
 293    blk = blk_new_open(NULL, NULL, options, flags, &local_err);
 294    if (!blk) {
 295        error_reportf_err(local_err, "Could not open '%s': ", optstr);
 296        return NULL;
 297    }
 298    blk_set_enable_write_cache(blk, !writethrough);
 299
 300    return blk;
 301}
 302
 303static BlockBackend *img_open_file(const char *filename,
 304                                   QDict *options,
 305                                   const char *fmt, int flags,
 306                                   bool writethrough, bool quiet,
 307                                   bool force_share)
 308{
 309    BlockBackend *blk;
 310    Error *local_err = NULL;
 311
 312    if (!options) {
 313        options = qdict_new();
 314    }
 315    if (fmt) {
 316        qdict_put_str(options, "driver", fmt);
 317    }
 318
 319    if (force_share) {
 320        qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
 321    }
 322    blk = blk_new_open(filename, NULL, options, flags, &local_err);
 323    if (!blk) {
 324        error_reportf_err(local_err, "Could not open '%s': ", filename);
 325        return NULL;
 326    }
 327    blk_set_enable_write_cache(blk, !writethrough);
 328
 329    return blk;
 330}
 331
 332
 333static int img_add_key_secrets(void *opaque,
 334                               const char *name, const char *value,
 335                               Error **errp)
 336{
 337    QDict *options = opaque;
 338
 339    if (g_str_has_suffix(name, "key-secret")) {
 340        qdict_put_str(options, name, value);
 341    }
 342
 343    return 0;
 344}
 345
 346
 347static BlockBackend *img_open(bool image_opts,
 348                              const char *filename,
 349                              const char *fmt, int flags, bool writethrough,
 350                              bool quiet, bool force_share)
 351{
 352    BlockBackend *blk;
 353    if (image_opts) {
 354        QemuOpts *opts;
 355        if (fmt) {
 356            error_report("--image-opts and --format are mutually exclusive");
 357            return NULL;
 358        }
 359        opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
 360                                       filename, true);
 361        if (!opts) {
 362            return NULL;
 363        }
 364        blk = img_open_opts(filename, opts, flags, writethrough, quiet,
 365                            force_share);
 366    } else {
 367        blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
 368                            force_share);
 369    }
 370
 371    if (blk) {
 372        blk_set_force_allow_inactivate(blk);
 373    }
 374
 375    return blk;
 376}
 377
 378
 379static int add_old_style_options(const char *fmt, QemuOpts *opts,
 380                                 const char *base_filename,
 381                                 const char *base_fmt)
 382{
 383    if (base_filename) {
 384        if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
 385                          NULL)) {
 386            error_report("Backing file not supported for file format '%s'",
 387                         fmt);
 388            return -1;
 389        }
 390    }
 391    if (base_fmt) {
 392        if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
 393            error_report("Backing file format not supported for file "
 394                         "format '%s'", fmt);
 395            return -1;
 396        }
 397    }
 398    return 0;
 399}
 400
 401static int64_t cvtnum_full(const char *name, const char *value,
 402                           bool is_size, int64_t min, int64_t max)
 403{
 404    int err;
 405    uint64_t res;
 406
 407    err = is_size ? qemu_strtosz(value, NULL, &res) :
 408                    qemu_strtou64(value, NULL, 0, &res);
 409    if (err < 0 && err != -ERANGE) {
 410        error_report("Invalid %s specified: '%s'", name, value);
 411        return err;
 412    }
 413    if (err == -ERANGE || res > max || res < min) {
 414        error_report("Invalid %s specified. Must be between %" PRId64
 415                     " and %" PRId64 ".", name, min, max);
 416        return -ERANGE;
 417    }
 418    return res;
 419}
 420
 421static int64_t cvtnum(const char *name, const char *value, bool is_size)
 422{
 423    return cvtnum_full(name, value, is_size, 0, INT64_MAX);
 424}
 425
 426static int img_create(const img_cmd_t *ccmd, int argc, char **argv)
 427{
 428    int c;
 429    int64_t img_size = -1;
 430    const char *fmt = "raw";
 431    const char *base_fmt = NULL;
 432    const char *filename;
 433    const char *base_filename = NULL;
 434    char *options = NULL;
 435    Error *local_err = NULL;
 436    bool quiet = false;
 437    int flags = 0;
 438
 439    for(;;) {
 440        static const struct option long_options[] = {
 441            {"help", no_argument, 0, 'h'},
 442            {"format", required_argument, 0, 'f'},
 443            {"options", required_argument, 0, 'o'},
 444            {"backing", required_argument, 0, 'b'},
 445            {"backing-format", required_argument, 0, 'B'}, /* was -F in 10.0 */
 446            {"backing-unsafe", no_argument, 0, 'u'},
 447            {"quiet", no_argument, 0, 'q'},
 448            {"object", required_argument, 0, OPTION_OBJECT},
 449            {0, 0, 0, 0}
 450        };
 451        c = getopt_long(argc, argv, "hf:o:b:F:B:uq",
 452                        long_options, NULL);
 453        if (c == -1) {
 454            break;
 455        }
 456        switch(c) {
 457        case 'h':
 458            cmd_help(ccmd, "[-f FMT] [-o FMT_OPTS]\n"
 459"        [-b BACKING_FILE [-B BACKING_FMT]] [-u]\n"
 460"        [-q] [--object OBJDEF] FILE [SIZE]\n"
 461,
 462"  -f, --format FMT\n"
 463"     specifies the format of the new image (default: raw)\n"
 464"  -o, --options FMT_OPTS\n"
 465"     format-specific options (specify '-o help' for help)\n"
 466"  -b, --backing BACKING_FILE\n"
 467"     create target image to be a CoW on top of BACKING_FILE\n"
 468"  -B, --backing-format BACKING_FMT (was -F in <= 10.0)\n"
 469"     specifies the format of BACKING_FILE (default: probing is used)\n"
 470"  -u, --backing-unsafe\n"
 471"     do not fail if BACKING_FILE can not be read\n"
 472"  -q, --quiet\n"
 473"     quiet mode (produce only error messages if any)\n"
 474"  --object OBJDEF\n"
 475"     defines QEMU user-creatable object\n"
 476"  FILE\n"
 477"     name of the image file to create (will be overritten if already exists)\n"
 478"  SIZE[bKMGTPE]\n"
 479"     image size with optional multiplier suffix (powers of 1024)\n"
 480"     (required unless BACKING_FILE is specified)\n"
 481);
 482            break;
 483        case 'f':
 484            fmt = optarg;
 485            break;
 486        case 'o':
 487            if (accumulate_options(&options, optarg) < 0) {
 488                goto fail;
 489            }
 490            break;
 491        case 'b':
 492            base_filename = optarg;
 493            break;
 494        case 'F': /* <=10.0 */
 495        case 'B':
 496            base_fmt = optarg;
 497            break;
 498        case 'u':
 499            flags |= BDRV_O_NO_BACKING;
 500            break;
 501        case 'q':
 502            quiet = true;
 503            break;
 504        case OPTION_OBJECT:
 505            user_creatable_process_cmdline(optarg);
 506            break;
 507        default:
 508            tryhelp(argv[0]);
 509        }
 510    }
 511
 512    /* Get the filename */
 513    filename = (optind < argc) ? argv[optind] : NULL;
 514    if (options && has_help_option(options)) {
 515        g_free(options);
 516        return print_block_option_help(filename, fmt);
 517    }
 518
 519    if (optind >= argc) {
 520        error_exit(argv[0], "Expecting image file name");
 521    }
 522    optind++;
 523
 524    /* Get image size, if specified */
 525    if (optind < argc) {
 526        img_size = cvtnum("image size", argv[optind++], true);
 527        if (img_size < 0) {
 528            goto fail;
 529        }
 530    }
 531    if (optind != argc) {
 532        error_exit(argv[0], "Unexpected argument: %s", argv[optind]);
 533    }
 534
 535    bdrv_img_create(filename, fmt, base_filename, base_fmt,
 536                    options, img_size, flags, quiet, &local_err);
 537    if (local_err) {
 538        error_reportf_err(local_err, "%s: ", filename);
 539        goto fail;
 540    }
 541
 542    g_free(options);
 543    return 0;
 544
 545fail:
 546    g_free(options);
 547    return 1;
 548}
 549
 550static void dump_json_image_check(ImageCheck *check, bool quiet)
 551{
 552    GString *str;
 553    QObject *obj;
 554    Visitor *v = qobject_output_visitor_new(&obj);
 555
 556    visit_type_ImageCheck(v, NULL, &check, &error_abort);
 557    visit_complete(v, &obj);
 558    str = qobject_to_json_pretty(obj, true);
 559    assert(str != NULL);
 560    qprintf(quiet, "%s\n", str->str);
 561    qobject_unref(obj);
 562    visit_free(v);
 563    g_string_free(str, true);
 564}
 565
 566static void dump_human_image_check(ImageCheck *check, bool quiet)
 567{
 568    if (!(check->corruptions || check->leaks || check->check_errors)) {
 569        qprintf(quiet, "No errors were found on the image.\n");
 570    } else {
 571        if (check->corruptions) {
 572            qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
 573                    "Data may be corrupted, or further writes to the image "
 574                    "may corrupt it.\n",
 575                    check->corruptions);
 576        }
 577
 578        if (check->leaks) {
 579            qprintf(quiet,
 580                    "\n%" PRId64 " leaked clusters were found on the image.\n"
 581                    "This means waste of disk space, but no harm to data.\n",
 582                    check->leaks);
 583        }
 584
 585        if (check->check_errors) {
 586            qprintf(quiet,
 587                    "\n%" PRId64
 588                    " internal errors have occurred during the check.\n",
 589                    check->check_errors);
 590        }
 591    }
 592
 593    if (check->total_clusters != 0 && check->allocated_clusters != 0) {
 594        qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
 595                "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
 596                check->allocated_clusters, check->total_clusters,
 597                check->allocated_clusters * 100.0 / check->total_clusters,
 598                check->fragmented_clusters * 100.0 / check->allocated_clusters,
 599                check->compressed_clusters * 100.0 /
 600                check->allocated_clusters);
 601    }
 602
 603    if (check->image_end_offset) {
 604        qprintf(quiet,
 605                "Image end offset: %" PRId64 "\n", check->image_end_offset);
 606    }
 607}
 608
 609static int collect_image_check(BlockDriverState *bs,
 610                   ImageCheck *check,
 611                   const char *filename,
 612                   const char *fmt,
 613                   int fix)
 614{
 615    int ret;
 616    BdrvCheckResult result;
 617
 618    ret = bdrv_check(bs, &result, fix);
 619    if (ret < 0) {
 620        return ret;
 621    }
 622
 623    check->filename                 = g_strdup(filename);
 624    check->format                   = g_strdup(bdrv_get_format_name(bs));
 625    check->check_errors             = result.check_errors;
 626    check->corruptions              = result.corruptions;
 627    check->has_corruptions          = result.corruptions != 0;
 628    check->leaks                    = result.leaks;
 629    check->has_leaks                = result.leaks != 0;
 630    check->corruptions_fixed        = result.corruptions_fixed;
 631    check->has_corruptions_fixed    = result.corruptions_fixed != 0;
 632    check->leaks_fixed              = result.leaks_fixed;
 633    check->has_leaks_fixed          = result.leaks_fixed != 0;
 634    check->image_end_offset         = result.image_end_offset;
 635    check->has_image_end_offset     = result.image_end_offset != 0;
 636    check->total_clusters           = result.bfi.total_clusters;
 637    check->has_total_clusters       = result.bfi.total_clusters != 0;
 638    check->allocated_clusters       = result.bfi.allocated_clusters;
 639    check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
 640    check->fragmented_clusters      = result.bfi.fragmented_clusters;
 641    check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
 642    check->compressed_clusters      = result.bfi.compressed_clusters;
 643    check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
 644
 645    return 0;
 646}
 647
 648/*
 649 * Checks an image for consistency. Exit codes:
 650 *
 651 *  0 - Check completed, image is good
 652 *  1 - Check not completed because of internal errors
 653 *  2 - Check completed, image is corrupted
 654 *  3 - Check completed, image has leaked clusters, but is good otherwise
 655 * 63 - Checks are not supported by the image format
 656 */
 657static int img_check(const img_cmd_t *ccmd, int argc, char **argv)
 658{
 659    int c, ret;
 660    OutputFormat output_format = OFORMAT_HUMAN;
 661    const char *filename, *fmt, *cache;
 662    BlockBackend *blk;
 663    BlockDriverState *bs;
 664    int fix = 0;
 665    int flags = BDRV_O_CHECK;
 666    bool writethrough;
 667    ImageCheck *check;
 668    bool quiet = false;
 669    bool image_opts = false;
 670    bool force_share = false;
 671
 672    fmt = NULL;
 673    cache = BDRV_DEFAULT_CACHE;
 674
 675    for(;;) {
 676        int option_index = 0;
 677        static const struct option long_options[] = {
 678            {"help", no_argument, 0, 'h'},
 679            {"format", required_argument, 0, 'f'},
 680            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
 681            {"cache", required_argument, 0, 'T'},
 682            {"repair", required_argument, 0, 'r'},
 683            {"force-share", no_argument, 0, 'U'},
 684            {"output", required_argument, 0, OPTION_OUTPUT},
 685            {"quiet", no_argument, 0, 'q'},
 686            {"object", required_argument, 0, OPTION_OBJECT},
 687            {0, 0, 0, 0}
 688        };
 689        c = getopt_long(argc, argv, "hf:T:r:Uq",
 690                        long_options, &option_index);
 691        if (c == -1) {
 692            break;
 693        }
 694        switch(c) {
 695        case 'h':
 696            cmd_help(ccmd, "[-f FMT | --image-opts] [-T CACHE_MODE] [-r leaks|all]\n"
 697"        [-U] [--output human|json] [-q] [--object OBJDEF] FILE\n"
 698,
 699"  -f, --format FMT\n"
 700"     specifies the format of the image explicitly (default: probing is used)\n"
 701"  --image-opts\n"
 702"     treat FILE as an option string (key=value,..), not a file name\n"
 703"     (incompatible with -f|--format)\n"
 704"  -T, --cache CACHE_MODE\n" /* why not -t ? */
 705"     cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
 706"  -r, --repair leaks|all\n"
 707"     repair errors of the given category in the image (image will be\n"
 708"     opened in read-write mode, incompatible with -U|--force-share)\n"
 709"  -U, --force-share\n"
 710"     open image in shared mode for concurrent access\n"
 711"  --output human|json\n"
 712"     output format (default: human)\n"
 713"  -q, --quiet\n"
 714"     quiet mode (produce only error messages if any)\n"
 715"  --object OBJDEF\n"
 716"     defines QEMU user-creatable object\n"
 717"  FILE\n"
 718"     name of the image file, or an option string (key=value,..)\n"
 719"     with --image-opts, to operate on\n"
 720);
 721            break;
 722        case 'f':
 723            fmt = optarg;
 724            break;
 725        case OPTION_IMAGE_OPTS:
 726            image_opts = true;
 727            break;
 728        case 'T':
 729            cache = optarg;
 730            break;
 731        case 'r':
 732            flags |= BDRV_O_RDWR;
 733
 734            if (!strcmp(optarg, "leaks")) {
 735                fix = BDRV_FIX_LEAKS;
 736            } else if (!strcmp(optarg, "all")) {
 737                fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
 738            } else {
 739                error_exit(argv[0],
 740                           "--repair (-r) expects 'leaks' or 'all', not '%s'",
 741                           optarg);
 742            }
 743            break;
 744        case 'U':
 745            force_share = true;
 746            break;
 747        case OPTION_OUTPUT:
 748            output_format = parse_output_format(argv[0], optarg);
 749            break;
 750        case 'q':
 751            quiet = true;
 752            break;
 753        case OPTION_OBJECT:
 754            user_creatable_process_cmdline(optarg);
 755            break;
 756        default:
 757            tryhelp(argv[0]);
 758        }
 759    }
 760    if (optind != argc - 1) {
 761        error_exit(argv[0], "Expecting one image file name");
 762    }
 763    filename = argv[optind++];
 764
 765    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
 766    if (ret < 0) {
 767        error_report("Invalid source cache option: %s", cache);
 768        return 1;
 769    }
 770
 771    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
 772                   force_share);
 773    if (!blk) {
 774        return 1;
 775    }
 776    bs = blk_bs(blk);
 777
 778    check = g_new0(ImageCheck, 1);
 779    ret = collect_image_check(bs, check, filename, fmt, fix);
 780
 781    if (ret == -ENOTSUP) {
 782        error_report("This image format does not support checks");
 783        ret = 63;
 784        goto fail;
 785    }
 786
 787    if (check->corruptions_fixed || check->leaks_fixed) {
 788        int corruptions_fixed, leaks_fixed;
 789        bool has_leaks_fixed, has_corruptions_fixed;
 790
 791        leaks_fixed         = check->leaks_fixed;
 792        has_leaks_fixed     = check->has_leaks_fixed;
 793        corruptions_fixed   = check->corruptions_fixed;
 794        has_corruptions_fixed = check->has_corruptions_fixed;
 795
 796        if (output_format == OFORMAT_HUMAN) {
 797            qprintf(quiet,
 798                    "The following inconsistencies were found and repaired:\n\n"
 799                    "    %" PRId64 " leaked clusters\n"
 800                    "    %" PRId64 " corruptions\n\n"
 801                    "Double checking the fixed image now...\n",
 802                    check->leaks_fixed,
 803                    check->corruptions_fixed);
 804        }
 805
 806        qapi_free_ImageCheck(check);
 807        check = g_new0(ImageCheck, 1);
 808        ret = collect_image_check(bs, check, filename, fmt, 0);
 809
 810        check->leaks_fixed          = leaks_fixed;
 811        check->has_leaks_fixed      = has_leaks_fixed;
 812        check->corruptions_fixed    = corruptions_fixed;
 813        check->has_corruptions_fixed = has_corruptions_fixed;
 814    }
 815
 816    if (!ret) {
 817        switch (output_format) {
 818        case OFORMAT_HUMAN:
 819            dump_human_image_check(check, quiet);
 820            break;
 821        case OFORMAT_JSON:
 822            dump_json_image_check(check, quiet);
 823            break;
 824        }
 825    }
 826
 827    if (ret || check->check_errors) {
 828        if (ret) {
 829            error_report("Check failed: %s", strerror(-ret));
 830        } else {
 831            error_report("Check failed");
 832        }
 833        ret = 1;
 834        goto fail;
 835    }
 836
 837    if (check->corruptions) {
 838        ret = 2;
 839    } else if (check->leaks) {
 840        ret = 3;
 841    } else {
 842        ret = 0;
 843    }
 844
 845fail:
 846    qapi_free_ImageCheck(check);
 847    blk_unref(blk);
 848    return ret;
 849}
 850
 851typedef struct CommonBlockJobCBInfo {
 852    BlockDriverState *bs;
 853    Error **errp;
 854} CommonBlockJobCBInfo;
 855
 856static void common_block_job_cb(void *opaque, int ret)
 857{
 858    CommonBlockJobCBInfo *cbi = opaque;
 859
 860    if (ret < 0) {
 861        error_setg_errno(cbi->errp, -ret, "Block job failed");
 862    }
 863}
 864
 865static void run_block_job(BlockJob *job, Error **errp)
 866{
 867    uint64_t progress_current, progress_total;
 868    AioContext *aio_context = block_job_get_aio_context(job);
 869    int ret = 0;
 870
 871    job_lock();
 872    job_ref_locked(&job->job);
 873    do {
 874        float progress = 0.0f;
 875        job_unlock();
 876        aio_poll(aio_context, true);
 877
 878        progress_get_snapshot(&job->job.progress, &progress_current,
 879                              &progress_total);
 880        if (progress_total) {
 881            progress = (float)progress_current / progress_total * 100.f;
 882        }
 883        qemu_progress_print(progress, 0);
 884        job_lock();
 885    } while (!job_is_ready_locked(&job->job) &&
 886             !job_is_completed_locked(&job->job));
 887
 888    if (!job_is_completed_locked(&job->job)) {
 889        ret = job_complete_sync_locked(&job->job, errp);
 890    } else {
 891        ret = job->job.ret;
 892    }
 893    job_unref_locked(&job->job);
 894    job_unlock();
 895
 896    /* publish completion progress only when success */
 897    if (!ret) {
 898        qemu_progress_print(100.f, 0);
 899    }
 900}
 901
 902static int img_commit(const img_cmd_t *ccmd, int argc, char **argv)
 903{
 904    int c, ret, flags;
 905    const char *filename, *fmt, *cache, *base;
 906    BlockBackend *blk;
 907    BlockDriverState *bs, *base_bs;
 908    BlockJob *job;
 909    bool progress = false, quiet = false, drop = false;
 910    bool writethrough;
 911    Error *local_err = NULL;
 912    CommonBlockJobCBInfo cbi;
 913    bool image_opts = false;
 914    int64_t rate_limit = 0;
 915
 916    fmt = NULL;
 917    cache = BDRV_DEFAULT_CACHE;
 918    base = NULL;
 919    for(;;) {
 920        static const struct option long_options[] = {
 921            {"help", no_argument, 0, 'h'},
 922            {"format", required_argument, 0, 'f'},
 923            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
 924            {"cache", required_argument, 0, 't'},
 925            {"drop", no_argument, 0, 'd'},
 926            {"base", required_argument, 0, 'b'},
 927            {"rate-limit", required_argument, 0, 'r'},
 928            {"progress", no_argument, 0, 'p'},
 929            {"quiet", no_argument, 0, 'q'},
 930            {"object", required_argument, 0, OPTION_OBJECT},
 931            {0, 0, 0, 0}
 932        };
 933        c = getopt_long(argc, argv, "hf:t:db:r:pq",
 934                        long_options, NULL);
 935        if (c == -1) {
 936            break;
 937        }
 938        switch(c) {
 939        case 'h':
 940            cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE_MODE] [-b BASE_IMG]\n"
 941"        [-d] [-r RATE] [-q] [--object OBJDEF] FILE\n"
 942,
 943"  -f, --format FMT\n"
 944"     specify FILE image format explicitly (default: probing is used)\n"
 945"  --image-opts\n"
 946"     treat FILE as an option string (key=value,..), not a file name\n"
 947"     (incompatible with -f|--format)\n"
 948"  -t, --cache CACHE_MODE image cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
 949"  -d, --drop\n"
 950"     skip emptying FILE on completion\n"
 951"  -b, --base BASE_IMG\n"
 952"     image in the backing chain to commit change to\n"
 953"     (default: immediate backing file; implies --drop)\n"
 954"  -r, --rate-limit RATE\n"
 955"     I/O rate limit, in bytes per second\n"
 956"  -p, --progress\n"
 957"     display progress information\n"
 958"  -q, --quiet\n"
 959"     quiet mode (produce only error messages if any)\n"
 960"  --object OBJDEF\n"
 961"     defines QEMU user-creatable object\n"
 962"  FILE\n"
 963"     name of the image file, or an option string (key=value,..)\n"
 964"     with --image-opts, to operate on\n"
 965);
 966            break;
 967        case 'f':
 968            fmt = optarg;
 969            break;
 970        case OPTION_IMAGE_OPTS:
 971            image_opts = true;
 972            break;
 973        case 't':
 974            cache = optarg;
 975            break;
 976        case 'd':
 977            drop = true;
 978            break;
 979        case 'b':
 980            base = optarg;
 981            /* -b implies -d */
 982            drop = true;
 983            break;
 984        case 'r':
 985            rate_limit = cvtnum("rate limit", optarg, true);
 986            if (rate_limit < 0) {
 987                return 1;
 988            }
 989            break;
 990        case 'p':
 991            progress = true;
 992            break;
 993        case 'q':
 994            quiet = true;
 995            break;
 996        case OPTION_OBJECT:
 997            user_creatable_process_cmdline(optarg);
 998            break;
 999        default:
1000            tryhelp(argv[0]);
1001        }
1002    }
1003
1004    /* Progress is not shown in Quiet mode */
1005    if (quiet) {
1006        progress = false;
1007    }
1008
1009    if (optind != argc - 1) {
1010        error_exit(argv[0], "Expecting one image file name");
1011    }
1012    filename = argv[optind++];
1013
1014    flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1015    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1016    if (ret < 0) {
1017        error_report("Invalid cache option: %s", cache);
1018        return 1;
1019    }
1020
1021    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1022                   false);
1023    if (!blk) {
1024        return 1;
1025    }
1026    bs = blk_bs(blk);
1027
1028    qemu_progress_init(progress, 1.f);
1029    qemu_progress_print(0.f, 100);
1030
1031    bdrv_graph_rdlock_main_loop();
1032    if (base) {
1033        base_bs = bdrv_find_backing_image(bs, base);
1034        if (!base_bs) {
1035            error_setg(&local_err,
1036                       "Did not find '%s' in the backing chain of '%s'",
1037                       base, filename);
1038            bdrv_graph_rdunlock_main_loop();
1039            goto done;
1040        }
1041    } else {
1042        /* This is different from QMP, which by default uses the deepest file in
1043         * the backing chain (i.e., the very base); however, the traditional
1044         * behavior of qemu-img commit is using the immediate backing file. */
1045        base_bs = bdrv_backing_chain_next(bs);
1046        if (!base_bs) {
1047            error_setg(&local_err, "Image does not have a backing file");
1048            bdrv_graph_rdunlock_main_loop();
1049            goto done;
1050        }
1051    }
1052    bdrv_graph_rdunlock_main_loop();
1053
1054    cbi = (CommonBlockJobCBInfo){
1055        .errp = &local_err,
1056        .bs   = bs,
1057    };
1058
1059    commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit,
1060                        BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1061                        &cbi, false, &local_err);
1062    if (local_err) {
1063        goto done;
1064    }
1065
1066    /* When the block job completes, the BlockBackend reference will point to
1067     * the old backing file. In order to avoid that the top image is already
1068     * deleted, so we can still empty it afterwards, increment the reference
1069     * counter here preemptively. */
1070    if (!drop) {
1071        bdrv_ref(bs);
1072    }
1073
1074    job = block_job_get("commit");
1075    assert(job);
1076    run_block_job(job, &local_err);
1077    if (local_err) {
1078        goto unref_backing;
1079    }
1080
1081    if (!drop) {
1082        BlockBackend *old_backing_blk;
1083
1084        old_backing_blk = blk_new_with_bs(bs, BLK_PERM_WRITE, BLK_PERM_ALL,
1085                                          &local_err);
1086        if (!old_backing_blk) {
1087            goto unref_backing;
1088        }
1089        ret = blk_make_empty(old_backing_blk, &local_err);
1090        blk_unref(old_backing_blk);
1091        if (ret == -ENOTSUP) {
1092            error_free(local_err);
1093            local_err = NULL;
1094        } else if (ret < 0) {
1095            goto unref_backing;
1096        }
1097    }
1098
1099unref_backing:
1100    if (!drop) {
1101        bdrv_unref(bs);
1102    }
1103
1104done:
1105    qemu_progress_end();
1106
1107    /*
1108     * Manually inactivate the image first because this way we can know whether
1109     * an error occurred. blk_unref() doesn't tell us about failures.
1110     */
1111    ret = bdrv_inactivate_all();
1112    if (ret < 0 && !local_err) {
1113        error_setg_errno(&local_err, -ret, "Error while closing the image");
1114    }
1115    blk_unref(blk);
1116
1117    if (local_err) {
1118        error_report_err(local_err);
1119        return 1;
1120    }
1121
1122    qprintf(quiet, "Image committed.\n");
1123    return 0;
1124}
1125
1126/*
1127 * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1128 * of the first sector boundary within buf where the sector contains a
1129 * non-zero byte.  This function is robust to a buffer that is not
1130 * sector-aligned.
1131 */
1132static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1133{
1134    int64_t i;
1135    int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1136
1137    for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1138        if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1139            return i;
1140        }
1141    }
1142    if (i < n && !buffer_is_zero(buf + i, n - end)) {
1143        return i;
1144    }
1145    return -1;
1146}
1147
1148/*
1149 * Returns true iff the first sector pointed to by 'buf' contains at least
1150 * a non-NUL byte.
1151 *
1152 * 'pnum' is set to the number of sectors (including and immediately following
1153 * the first one) that are known to be in the same allocated/unallocated state.
1154 * The function will try to align the end offset to alignment boundaries so
1155 * that the request will at least end aligned and consecutive requests will
1156 * also start at an aligned offset.
1157 */
1158static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1159                                int64_t sector_num, int alignment)
1160{
1161    bool is_zero;
1162    int i, tail;
1163
1164    if (n <= 0) {
1165        *pnum = 0;
1166        return 0;
1167    }
1168    is_zero = buffer_is_zero(buf, BDRV_SECTOR_SIZE);
1169    for(i = 1; i < n; i++) {
1170        buf += BDRV_SECTOR_SIZE;
1171        if (is_zero != buffer_is_zero(buf, BDRV_SECTOR_SIZE)) {
1172            break;
1173        }
1174    }
1175
1176    if (i == n) {
1177        /*
1178         * The whole buf is the same.
1179         * No reason to split it into chunks, so return now.
1180         */
1181        *pnum = i;
1182        return !is_zero;
1183    }
1184
1185    tail = (sector_num + i) & (alignment - 1);
1186    if (tail) {
1187        if (is_zero && i <= tail) {
1188            /*
1189             * For sure next sector after i is data, and it will rewrite this
1190             * tail anyway due to RMW. So, let's just write data now.
1191             */
1192            is_zero = false;
1193        }
1194        if (!is_zero) {
1195            /* If possible, align up end offset of allocated areas. */
1196            i += alignment - tail;
1197            i = MIN(i, n);
1198        } else {
1199            /*
1200             * For sure next sector after i is data, and it will rewrite this
1201             * tail anyway due to RMW. Better is avoid RMW and write zeroes up
1202             * to aligned bound.
1203             */
1204            i -= tail;
1205        }
1206    }
1207    *pnum = i;
1208    return !is_zero;
1209}
1210
1211/*
1212 * Like is_allocated_sectors, but if the buffer starts with a used sector,
1213 * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1214 * breaking up write requests for only small sparse areas.
1215 */
1216static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1217    int min, int64_t sector_num, int alignment)
1218{
1219    int ret;
1220    int num_checked, num_used;
1221
1222    if (n < min) {
1223        min = n;
1224    }
1225
1226    ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1227    if (!ret) {
1228        return ret;
1229    }
1230
1231    num_used = *pnum;
1232    buf += BDRV_SECTOR_SIZE * *pnum;
1233    n -= *pnum;
1234    sector_num += *pnum;
1235    num_checked = num_used;
1236
1237    while (n > 0) {
1238        ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1239
1240        buf += BDRV_SECTOR_SIZE * *pnum;
1241        n -= *pnum;
1242        sector_num += *pnum;
1243        num_checked += *pnum;
1244        if (ret) {
1245            num_used = num_checked;
1246        } else if (*pnum >= min) {
1247            break;
1248        }
1249    }
1250
1251    *pnum = num_used;
1252    return 1;
1253}
1254
1255/*
1256 * Compares two buffers chunk by chunk, where @chsize is the chunk size.
1257 * If @chsize is 0, default chunk size of BDRV_SECTOR_SIZE is used.
1258 * Returns 0 if the first chunk of each buffer matches, non-zero otherwise.
1259 *
1260 * @pnum is set to the size of the buffer prefix aligned to @chsize that
1261 * has the same matching status as the first chunk.
1262 */
1263static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1264                           int64_t bytes, uint64_t chsize, int64_t *pnum)
1265{
1266    bool res;
1267    int64_t i;
1268
1269    assert(bytes > 0);
1270
1271    if (!chsize) {
1272        chsize = BDRV_SECTOR_SIZE;
1273    }
1274    i = MIN(bytes, chsize);
1275
1276    res = !!memcmp(buf1, buf2, i);
1277    while (i < bytes) {
1278        int64_t len = MIN(bytes - i, chsize);
1279
1280        if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1281            break;
1282        }
1283        i += len;
1284    }
1285
1286    *pnum = i;
1287    return res;
1288}
1289
1290#define IO_BUF_SIZE (2 * MiB)
1291
1292/*
1293 * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1294 *
1295 * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1296 * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1297 * failure), and 4 on error (the exit status for read errors), after emitting
1298 * an error message.
1299 *
1300 * @param blk:  BlockBackend for the image
1301 * @param offset: Starting offset to check
1302 * @param bytes: Number of bytes to check
1303 * @param filename: Name of disk file we are checking (logging purpose)
1304 * @param buffer: Allocated buffer for storing read data
1305 * @param quiet: Flag for quiet mode
1306 */
1307static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1308                               int64_t bytes, const char *filename,
1309                               uint8_t *buffer, bool quiet)
1310{
1311    int ret = 0;
1312    int64_t idx;
1313
1314    ret = blk_pread(blk, offset, bytes, buffer, 0);
1315    if (ret < 0) {
1316        error_report("Error while reading offset %" PRId64 " of %s: %s",
1317                     offset, filename, strerror(-ret));
1318        return 4;
1319    }
1320    idx = find_nonzero(buffer, bytes);
1321    if (idx >= 0) {
1322        qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1323                offset + idx);
1324        return 1;
1325    }
1326
1327    return 0;
1328}
1329
1330/*
1331 * Compares two images. Exit codes:
1332 *
1333 * 0 - Images are identical or the requested help was printed
1334 * 1 - Images differ
1335 * >1 - Error occurred
1336 */
1337static int img_compare(const img_cmd_t *ccmd, int argc, char **argv)
1338{
1339    const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1340    BlockBackend *blk1, *blk2;
1341    BlockDriverState *bs1, *bs2;
1342    int64_t total_size1, total_size2;
1343    uint8_t *buf1 = NULL, *buf2 = NULL;
1344    int64_t pnum1, pnum2;
1345    int allocated1, allocated2;
1346    int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1347    bool progress = false, quiet = false, strict = false;
1348    int flags;
1349    bool writethrough;
1350    int64_t total_size;
1351    int64_t offset = 0;
1352    int64_t chunk;
1353    int c;
1354    uint64_t progress_base;
1355    bool image_opts = false;
1356    bool force_share = false;
1357
1358    cache = BDRV_DEFAULT_CACHE;
1359    for (;;) {
1360        static const struct option long_options[] = {
1361            {"help", no_argument, 0, 'h'},
1362            {"a-format", required_argument, 0, 'f'},
1363            {"b-format", required_argument, 0, 'F'},
1364            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1365            {"strict", no_argument, 0, 's'},
1366            {"cache", required_argument, 0, 'T'},
1367            {"force-share", no_argument, 0, 'U'},
1368            {"progress", no_argument, 0, 'p'},
1369            {"quiet", no_argument, 0, 'q'},
1370            {"object", required_argument, 0, OPTION_OBJECT},
1371            {0, 0, 0, 0}
1372        };
1373        c = getopt_long(argc, argv, "hf:F:sT:Upq",
1374                        long_options, NULL);
1375        if (c == -1) {
1376            break;
1377        }
1378        switch (c) {
1379        case 'h':
1380            cmd_help(ccmd,
1381"[[-f FMT] [-F FMT] | --image-opts] [-s] [-T CACHE]\n"
1382"        [-U] [-p] [-q] [--object OBJDEF] FILE1 FILE2\n"
1383,
1384"  -f, --a-format FMT\n"
1385"     specify FILE1 image format explicitly (default: probing is used)\n"
1386"  -F, --b-format FMT\n"
1387"     specify FILE2 image format explicitly (default: probing is used)\n"
1388"  --image-opts\n"
1389"     treat FILE1 and FILE2 as option strings (key=value,..), not file names\n"
1390"     (incompatible with -f|--a-format and -F|--b-format)\n"
1391"  -s, --strict\n"
1392"     strict mode, also check if sizes are equal\n"
1393"  -T, --cache CACHE_MODE\n"
1394"     images caching mode (default: " BDRV_DEFAULT_CACHE ")\n"
1395"  -U, --force-share\n"
1396"     open images in shared mode for concurrent access\n"
1397"  -p, --progress\n"
1398"     display progress information\n"
1399"  -q, --quiet\n"
1400"     quiet mode (produce only error messages if any)\n"
1401"  --object OBJDEF\n"
1402"     defines QEMU user-creatable object\n"
1403"  FILE1, FILE2\n"
1404"     names of the image files, or option strings (key=value,..)\n"
1405"     with --image-opts, to compare\n"
1406);
1407            break;
1408        case 'f':
1409            fmt1 = optarg;
1410            break;
1411        case 'F':
1412            fmt2 = optarg;
1413            break;
1414        case OPTION_IMAGE_OPTS:
1415            image_opts = true;
1416            break;
1417        case 's':
1418            strict = true;
1419            break;
1420        case 'T':
1421            cache = optarg;
1422            break;
1423        case 'U':
1424            force_share = true;
1425            break;
1426        case 'p':
1427            progress = true;
1428            break;
1429        case 'q':
1430            quiet = true;
1431            break;
1432        case OPTION_OBJECT:
1433            user_creatable_process_cmdline(optarg);
1434            break;
1435        default:
1436            tryhelp(argv[0]);
1437        }
1438    }
1439
1440    /* Progress is not shown in Quiet mode */
1441    if (quiet) {
1442        progress = false;
1443    }
1444
1445
1446    if (optind != argc - 2) {
1447        error_exit(argv[0], "Expecting two image file names");
1448    }
1449    filename1 = argv[optind++];
1450    filename2 = argv[optind++];
1451
1452    /* Initialize before goto out */
1453    qemu_progress_init(progress, 2.0);
1454
1455    flags = 0;
1456    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1457    if (ret < 0) {
1458        error_report("Invalid source cache option: %s", cache);
1459        ret = 2;
1460        goto out3;
1461    }
1462
1463    blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1464                    force_share);
1465    if (!blk1) {
1466        ret = 2;
1467        goto out3;
1468    }
1469
1470    blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1471                    force_share);
1472    if (!blk2) {
1473        ret = 2;
1474        goto out2;
1475    }
1476    bs1 = blk_bs(blk1);
1477    bs2 = blk_bs(blk2);
1478
1479    buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1480    buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1481    total_size1 = blk_getlength(blk1);
1482    if (total_size1 < 0) {
1483        error_report("Can't get size of %s: %s",
1484                     filename1, strerror(-total_size1));
1485        ret = 4;
1486        goto out;
1487    }
1488    total_size2 = blk_getlength(blk2);
1489    if (total_size2 < 0) {
1490        error_report("Can't get size of %s: %s",
1491                     filename2, strerror(-total_size2));
1492        ret = 4;
1493        goto out;
1494    }
1495    total_size = MIN(total_size1, total_size2);
1496    progress_base = MAX(total_size1, total_size2);
1497
1498    qemu_progress_print(0, 100);
1499
1500    if (strict && total_size1 != total_size2) {
1501        ret = 1;
1502        qprintf(quiet, "Strict mode: Image size mismatch!\n");
1503        goto out;
1504    }
1505
1506    while (offset < total_size) {
1507        int status1, status2;
1508
1509        status1 = bdrv_block_status_above(bs1, NULL, offset,
1510                                          total_size1 - offset, &pnum1, NULL,
1511                                          NULL);
1512        if (status1 < 0) {
1513            ret = 3;
1514            error_report("Sector allocation test failed for %s", filename1);
1515            goto out;
1516        }
1517        allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1518
1519        status2 = bdrv_block_status_above(bs2, NULL, offset,
1520                                          total_size2 - offset, &pnum2, NULL,
1521                                          NULL);
1522        if (status2 < 0) {
1523            ret = 3;
1524            error_report("Sector allocation test failed for %s", filename2);
1525            goto out;
1526        }
1527        allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1528
1529        assert(pnum1 && pnum2);
1530        chunk = MIN(pnum1, pnum2);
1531
1532        if (strict) {
1533            if (status1 != status2) {
1534                ret = 1;
1535                qprintf(quiet, "Strict mode: Offset %" PRId64
1536                        " block status mismatch!\n", offset);
1537                goto out;
1538            }
1539        }
1540        if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1541            /* nothing to do */
1542        } else if (allocated1 == allocated2) {
1543            if (allocated1) {
1544                int64_t pnum;
1545
1546                chunk = MIN(chunk, IO_BUF_SIZE);
1547                ret = blk_pread(blk1, offset, chunk, buf1, 0);
1548                if (ret < 0) {
1549                    error_report("Error while reading offset %" PRId64
1550                                 " of %s: %s",
1551                                 offset, filename1, strerror(-ret));
1552                    ret = 4;
1553                    goto out;
1554                }
1555                ret = blk_pread(blk2, offset, chunk, buf2, 0);
1556                if (ret < 0) {
1557                    error_report("Error while reading offset %" PRId64
1558                                 " of %s: %s",
1559                                 offset, filename2, strerror(-ret));
1560                    ret = 4;
1561                    goto out;
1562                }
1563                ret = compare_buffers(buf1, buf2, chunk, 0, &pnum);
1564                if (ret || pnum != chunk) {
1565                    qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1566                            offset + (ret ? 0 : pnum));
1567                    ret = 1;
1568                    goto out;
1569                }
1570            }
1571        } else {
1572            chunk = MIN(chunk, IO_BUF_SIZE);
1573            if (allocated1) {
1574                ret = check_empty_sectors(blk1, offset, chunk,
1575                                          filename1, buf1, quiet);
1576            } else {
1577                ret = check_empty_sectors(blk2, offset, chunk,
1578                                          filename2, buf1, quiet);
1579            }
1580            if (ret) {
1581                goto out;
1582            }
1583        }
1584        offset += chunk;
1585        qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1586    }
1587
1588    if (total_size1 != total_size2) {
1589        BlockBackend *blk_over;
1590        const char *filename_over;
1591
1592        qprintf(quiet, "Warning: Image size mismatch!\n");
1593        if (total_size1 > total_size2) {
1594            blk_over = blk1;
1595            filename_over = filename1;
1596        } else {
1597            blk_over = blk2;
1598            filename_over = filename2;
1599        }
1600
1601        while (offset < progress_base) {
1602            ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1603                                          progress_base - offset, &chunk,
1604                                          NULL, NULL);
1605            if (ret < 0) {
1606                ret = 3;
1607                error_report("Sector allocation test failed for %s",
1608                             filename_over);
1609                goto out;
1610
1611            }
1612            if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1613                chunk = MIN(chunk, IO_BUF_SIZE);
1614                ret = check_empty_sectors(blk_over, offset, chunk,
1615                                          filename_over, buf1, quiet);
1616                if (ret) {
1617                    goto out;
1618                }
1619            }
1620            offset += chunk;
1621            qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1622        }
1623    }
1624
1625    qprintf(quiet, "Images are identical.\n");
1626    ret = 0;
1627
1628out:
1629    qemu_vfree(buf1);
1630    qemu_vfree(buf2);
1631    blk_unref(blk2);
1632out2:
1633    blk_unref(blk1);
1634out3:
1635    qemu_progress_end();
1636    return ret;
1637}
1638
1639/* Convenience wrapper around qmp_block_dirty_bitmap_merge */
1640static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name,
1641                                  const char *src_node, const char *src_name,
1642                                  Error **errp)
1643{
1644    BlockDirtyBitmapOrStr *merge_src;
1645    BlockDirtyBitmapOrStrList *list = NULL;
1646
1647    merge_src = g_new0(BlockDirtyBitmapOrStr, 1);
1648    merge_src->type = QTYPE_QDICT;
1649    merge_src->u.external.node = g_strdup(src_node);
1650    merge_src->u.external.name = g_strdup(src_name);
1651    QAPI_LIST_PREPEND(list, merge_src);
1652    qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp);
1653    qapi_free_BlockDirtyBitmapOrStrList(list);
1654}
1655
1656enum ImgConvertBlockStatus {
1657    BLK_DATA,
1658    BLK_ZERO,
1659    BLK_BACKING_FILE,
1660};
1661
1662#define MAX_COROUTINES 16
1663#define CONVERT_THROTTLE_GROUP "img_convert"
1664
1665typedef struct ImgConvertState {
1666    BlockBackend **src;
1667    int64_t *src_sectors;
1668    int *src_alignment;
1669    int src_num;
1670    int64_t total_sectors;
1671    int64_t allocated_sectors;
1672    int64_t allocated_done;
1673    int64_t sector_num;
1674    int64_t wr_offs;
1675    enum ImgConvertBlockStatus status;
1676    int64_t sector_next_status;
1677    BlockBackend *target;
1678    bool has_zero_init;
1679    bool compressed;
1680    bool target_is_new;
1681    bool target_has_backing;
1682    int64_t target_backing_sectors; /* negative if unknown */
1683    bool wr_in_order;
1684    bool copy_range;
1685    bool salvage;
1686    bool quiet;
1687    int min_sparse;
1688    int alignment;
1689    size_t cluster_sectors;
1690    size_t buf_sectors;
1691    long num_coroutines;
1692    int running_coroutines;
1693    Coroutine *co[MAX_COROUTINES];
1694    int64_t wait_sector_num[MAX_COROUTINES];
1695    CoMutex lock;
1696    int ret;
1697} ImgConvertState;
1698
1699static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1700                                int *src_cur, int64_t *src_cur_offset)
1701{
1702    *src_cur = 0;
1703    *src_cur_offset = 0;
1704    while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1705        *src_cur_offset += s->src_sectors[*src_cur];
1706        (*src_cur)++;
1707        assert(*src_cur < s->src_num);
1708    }
1709}
1710
1711static int coroutine_mixed_fn GRAPH_RDLOCK
1712convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1713{
1714    int64_t src_cur_offset;
1715    int ret, n, src_cur;
1716    bool post_backing_zero = false;
1717
1718    convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1719
1720    assert(s->total_sectors > sector_num);
1721    n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1722
1723    if (s->target_backing_sectors >= 0) {
1724        if (sector_num >= s->target_backing_sectors) {
1725            post_backing_zero = true;
1726        } else if (sector_num + n > s->target_backing_sectors) {
1727            /* Split requests around target_backing_sectors (because
1728             * starting from there, zeros are handled differently) */
1729            n = s->target_backing_sectors - sector_num;
1730        }
1731    }
1732
1733    if (s->sector_next_status <= sector_num) {
1734        uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1735        int64_t count;
1736        int tail;
1737        BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
1738        BlockDriverState *base;
1739
1740        if (s->target_has_backing) {
1741            base = bdrv_cow_bs(bdrv_skip_filters(src_bs));
1742        } else {
1743            base = NULL;
1744        }
1745
1746        do {
1747            count = n * BDRV_SECTOR_SIZE;
1748
1749            ret = bdrv_block_status_above(src_bs, base, offset, count, &count,
1750                                          NULL, NULL);
1751
1752            if (ret < 0) {
1753                if (s->salvage) {
1754                    if (n == 1) {
1755                        if (!s->quiet) {
1756                            warn_report("error while reading block status at "
1757                                        "offset %" PRIu64 ": %s", offset,
1758                                        strerror(-ret));
1759                        }
1760                        /* Just try to read the data, then */
1761                        ret = BDRV_BLOCK_DATA;
1762                        count = BDRV_SECTOR_SIZE;
1763                    } else {
1764                        /* Retry on a shorter range */
1765                        n = DIV_ROUND_UP(n, 4);
1766                    }
1767                } else {
1768                    error_report("error while reading block status at offset "
1769                                 "%" PRIu64 ": %s", offset, strerror(-ret));
1770                    return ret;
1771                }
1772            }
1773        } while (ret < 0);
1774
1775        n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1776
1777        /*
1778         * Avoid that s->sector_next_status becomes unaligned to the source
1779         * request alignment and/or cluster size to avoid unnecessary read
1780         * cycles.
1781         */
1782        tail = (sector_num - src_cur_offset + n) % s->src_alignment[src_cur];
1783        if (n > tail) {
1784            n -= tail;
1785        }
1786
1787        if (ret & BDRV_BLOCK_ZERO) {
1788            s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1789        } else if (ret & BDRV_BLOCK_DATA) {
1790            s->status = BLK_DATA;
1791        } else {
1792            s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1793        }
1794
1795        s->sector_next_status = sector_num + n;
1796    }
1797
1798    n = MIN(n, s->sector_next_status - sector_num);
1799    if (s->status == BLK_DATA) {
1800        n = MIN(n, s->buf_sectors);
1801    }
1802
1803    /* We need to write complete clusters for compressed images, so if an
1804     * unallocated area is shorter than that, we must consider the whole
1805     * cluster allocated. */
1806    if (s->compressed) {
1807        if (n < s->cluster_sectors) {
1808            n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1809            s->status = BLK_DATA;
1810        } else {
1811            n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1812        }
1813    }
1814
1815    return n;
1816}
1817
1818static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1819                                        int nb_sectors, uint8_t *buf)
1820{
1821    uint64_t single_read_until = 0;
1822    int n, ret;
1823
1824    assert(nb_sectors <= s->buf_sectors);
1825    while (nb_sectors > 0) {
1826        BlockBackend *blk;
1827        int src_cur;
1828        int64_t bs_sectors, src_cur_offset;
1829        uint64_t offset;
1830
1831        /* In the case of compression with multiple source files, we can get a
1832         * nb_sectors that spreads into the next part. So we must be able to
1833         * read across multiple BDSes for one convert_read() call. */
1834        convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1835        blk = s->src[src_cur];
1836        bs_sectors = s->src_sectors[src_cur];
1837
1838        offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1839
1840        n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1841        if (single_read_until > offset) {
1842            n = 1;
1843        }
1844
1845        ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1846        if (ret < 0) {
1847            if (s->salvage) {
1848                if (n > 1) {
1849                    single_read_until = offset + (n << BDRV_SECTOR_BITS);
1850                    continue;
1851                } else {
1852                    if (!s->quiet) {
1853                        warn_report("error while reading offset %" PRIu64
1854                                    ": %s", offset, strerror(-ret));
1855                    }
1856                    memset(buf, 0, BDRV_SECTOR_SIZE);
1857                }
1858            } else {
1859                return ret;
1860            }
1861        }
1862
1863        sector_num += n;
1864        nb_sectors -= n;
1865        buf += n * BDRV_SECTOR_SIZE;
1866    }
1867
1868    return 0;
1869}
1870
1871
1872static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1873                                         int nb_sectors, uint8_t *buf,
1874                                         enum ImgConvertBlockStatus status)
1875{
1876    int ret;
1877
1878    while (nb_sectors > 0) {
1879        int n = nb_sectors;
1880        BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1881
1882        switch (status) {
1883        case BLK_BACKING_FILE:
1884            /* If we have a backing file, leave clusters unallocated that are
1885             * unallocated in the source image, so that the backing file is
1886             * visible at the respective offset. */
1887            assert(s->target_has_backing);
1888            break;
1889
1890        case BLK_DATA:
1891            /* If we're told to keep the target fully allocated (-S 0) or there
1892             * is real non-zero data, we must write it. Otherwise we can treat
1893             * it as zero sectors.
1894             * Compressed clusters need to be written as a whole, so in that
1895             * case we can only save the write if the buffer is completely
1896             * zeroed. */
1897            if (!s->min_sparse ||
1898                (!s->compressed &&
1899                 is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1900                                          sector_num, s->alignment)) ||
1901                (s->compressed &&
1902                 !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1903            {
1904                ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1905                                    n << BDRV_SECTOR_BITS, buf, flags);
1906                if (ret < 0) {
1907                    return ret;
1908                }
1909                break;
1910            }
1911            /* fall-through */
1912
1913        case BLK_ZERO:
1914            if (s->has_zero_init) {
1915                assert(!s->target_has_backing);
1916                break;
1917            }
1918            ret = blk_co_pwrite_zeroes(s->target,
1919                                       sector_num << BDRV_SECTOR_BITS,
1920                                       n << BDRV_SECTOR_BITS,
1921                                       BDRV_REQ_MAY_UNMAP);
1922            if (ret < 0) {
1923                return ret;
1924            }
1925            break;
1926        }
1927
1928        sector_num += n;
1929        nb_sectors -= n;
1930        buf += n * BDRV_SECTOR_SIZE;
1931    }
1932
1933    return 0;
1934}
1935
1936static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1937                                              int nb_sectors)
1938{
1939    int n, ret;
1940
1941    while (nb_sectors > 0) {
1942        BlockBackend *blk;
1943        int src_cur;
1944        int64_t bs_sectors, src_cur_offset;
1945        int64_t offset;
1946
1947        convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1948        offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1949        blk = s->src[src_cur];
1950        bs_sectors = s->src_sectors[src_cur];
1951
1952        n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1953
1954        ret = blk_co_copy_range(blk, offset, s->target,
1955                                sector_num << BDRV_SECTOR_BITS,
1956                                n << BDRV_SECTOR_BITS, 0, 0);
1957        if (ret < 0) {
1958            return ret;
1959        }
1960
1961        sector_num += n;
1962        nb_sectors -= n;
1963    }
1964    return 0;
1965}
1966
1967static void coroutine_fn convert_co_do_copy(void *opaque)
1968{
1969    ImgConvertState *s = opaque;
1970    uint8_t *buf = NULL;
1971    int ret, i;
1972    int index = -1;
1973
1974    for (i = 0; i < s->num_coroutines; i++) {
1975        if (s->co[i] == qemu_coroutine_self()) {
1976            index = i;
1977            break;
1978        }
1979    }
1980    assert(index >= 0);
1981
1982    s->running_coroutines++;
1983    buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1984
1985    while (1) {
1986        int n;
1987        int64_t sector_num;
1988        enum ImgConvertBlockStatus status;
1989        bool copy_range;
1990
1991        qemu_co_mutex_lock(&s->lock);
1992        if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1993            qemu_co_mutex_unlock(&s->lock);
1994            break;
1995        }
1996        WITH_GRAPH_RDLOCK_GUARD() {
1997            n = convert_iteration_sectors(s, s->sector_num);
1998        }
1999        if (n < 0) {
2000            qemu_co_mutex_unlock(&s->lock);
2001            s->ret = n;
2002            break;
2003        }
2004        /* save current sector and allocation status to local variables */
2005        sector_num = s->sector_num;
2006        status = s->status;
2007        if (!s->min_sparse && s->status == BLK_ZERO) {
2008            n = MIN(n, s->buf_sectors);
2009        }
2010        /* increment global sector counter so that other coroutines can
2011         * already continue reading beyond this request */
2012        s->sector_num += n;
2013        qemu_co_mutex_unlock(&s->lock);
2014
2015        if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
2016            s->allocated_done += n;
2017            qemu_progress_print(100.0 * s->allocated_done /
2018                                        s->allocated_sectors, 0);
2019        }
2020
2021retry:
2022        copy_range = s->copy_range && s->status == BLK_DATA;
2023        if (status == BLK_DATA && !copy_range) {
2024            ret = convert_co_read(s, sector_num, n, buf);
2025            if (ret < 0) {
2026                error_report("error while reading at byte %lld: %s",
2027                             sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2028                s->ret = ret;
2029            }
2030        } else if (!s->min_sparse && status == BLK_ZERO) {
2031            status = BLK_DATA;
2032            memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
2033        }
2034
2035        if (s->wr_in_order) {
2036            /* keep writes in order */
2037            while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
2038                s->wait_sector_num[index] = sector_num;
2039                qemu_coroutine_yield();
2040            }
2041            s->wait_sector_num[index] = -1;
2042        }
2043
2044        if (s->ret == -EINPROGRESS) {
2045            if (copy_range) {
2046                WITH_GRAPH_RDLOCK_GUARD() {
2047                    ret = convert_co_copy_range(s, sector_num, n);
2048                }
2049                if (ret) {
2050                    s->copy_range = false;
2051                    goto retry;
2052                }
2053            } else {
2054                ret = convert_co_write(s, sector_num, n, buf, status);
2055            }
2056            if (ret < 0) {
2057                error_report("error while writing at byte %lld: %s",
2058                             sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2059                s->ret = ret;
2060            }
2061        }
2062
2063        if (s->wr_in_order) {
2064            /* reenter the coroutine that might have waited
2065             * for this write to complete */
2066            s->wr_offs = sector_num + n;
2067            for (i = 0; i < s->num_coroutines; i++) {
2068                if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
2069                    /*
2070                     * A -> B -> A cannot occur because A has
2071                     * s->wait_sector_num[i] == -1 during A -> B.  Therefore
2072                     * B will never enter A during this time window.
2073                     */
2074                    qemu_coroutine_enter(s->co[i]);
2075                    break;
2076                }
2077            }
2078        }
2079    }
2080
2081    qemu_vfree(buf);
2082    s->co[index] = NULL;
2083    s->running_coroutines--;
2084    if (!s->running_coroutines && s->ret == -EINPROGRESS) {
2085        /* the convert job finished successfully */
2086        s->ret = 0;
2087    }
2088}
2089
2090static int convert_do_copy(ImgConvertState *s)
2091{
2092    int ret, i, n;
2093    int64_t sector_num = 0;
2094
2095    /* Check whether we have zero initialisation or can get it efficiently */
2096    if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
2097        !s->target_has_backing) {
2098        bdrv_graph_rdlock_main_loop();
2099        s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
2100        bdrv_graph_rdunlock_main_loop();
2101    }
2102
2103    /* Allocate buffer for copied data. For compressed images, only one cluster
2104     * can be copied at a time. */
2105    if (s->compressed) {
2106        if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2107            error_report("invalid cluster size");
2108            return -EINVAL;
2109        }
2110        s->buf_sectors = s->cluster_sectors;
2111    }
2112
2113    while (sector_num < s->total_sectors) {
2114        bdrv_graph_rdlock_main_loop();
2115        n = convert_iteration_sectors(s, sector_num);
2116        bdrv_graph_rdunlock_main_loop();
2117        if (n < 0) {
2118            return n;
2119        }
2120        if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2121        {
2122            s->allocated_sectors += n;
2123        }
2124        sector_num += n;
2125    }
2126
2127    /* Do the copy */
2128    s->sector_next_status = 0;
2129    s->ret = -EINPROGRESS;
2130
2131    qemu_co_mutex_init(&s->lock);
2132    for (i = 0; i < s->num_coroutines; i++) {
2133        s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2134        s->wait_sector_num[i] = -1;
2135        qemu_coroutine_enter(s->co[i]);
2136    }
2137
2138    while (s->running_coroutines) {
2139        main_loop_wait(false);
2140    }
2141
2142    if (s->compressed && !s->ret) {
2143        /* signal EOF to align */
2144        ret = blk_pwrite_compressed(s->target, 0, 0, NULL);
2145        if (ret < 0) {
2146            return ret;
2147        }
2148    }
2149
2150    return s->ret;
2151}
2152
2153/* Check that bitmaps can be copied, or output an error */
2154static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken)
2155{
2156    BdrvDirtyBitmap *bm;
2157
2158    if (!bdrv_supports_persistent_dirty_bitmap(src)) {
2159        error_report("Source lacks bitmap support");
2160        return -1;
2161    }
2162    FOR_EACH_DIRTY_BITMAP(src, bm) {
2163        if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2164            continue;
2165        }
2166        if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2167            error_report("Cannot copy inconsistent bitmap '%s'",
2168                         bdrv_dirty_bitmap_name(bm));
2169            error_printf("Try --skip-broken-bitmaps, or "
2170                         "use 'qemu-img bitmap --remove' to delete it\n");
2171            return -1;
2172        }
2173    }
2174    return 0;
2175}
2176
2177static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
2178                                bool skip_broken)
2179{
2180    BdrvDirtyBitmap *bm;
2181    Error *err = NULL;
2182
2183    FOR_EACH_DIRTY_BITMAP(src, bm) {
2184        const char *name;
2185
2186        if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2187            continue;
2188        }
2189        name = bdrv_dirty_bitmap_name(bm);
2190        if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2191            warn_report("Skipping inconsistent bitmap '%s'", name);
2192            continue;
2193        }
2194        qmp_block_dirty_bitmap_add(dst->node_name, name,
2195                                   true, bdrv_dirty_bitmap_granularity(bm),
2196                                   true, true,
2197                                   true, !bdrv_dirty_bitmap_enabled(bm),
2198                                   &err);
2199        if (err) {
2200            error_reportf_err(err, "Failed to create bitmap %s: ", name);
2201            return -1;
2202        }
2203
2204        do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name,
2205                              &err);
2206        if (err) {
2207            error_reportf_err(err, "Failed to populate bitmap %s: ", name);
2208            qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL);
2209            return -1;
2210        }
2211    }
2212
2213    return 0;
2214}
2215
2216#define MAX_BUF_SECTORS 32768
2217
2218static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
2219{
2220    ThrottleConfig cfg;
2221
2222    throttle_config_init(&cfg);
2223    cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit;
2224
2225    blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP);
2226    blk_set_io_limits(blk, &cfg);
2227}
2228
2229static int img_convert(const img_cmd_t *ccmd, int argc, char **argv)
2230{
2231    int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
2232    const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2233               *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2234               *out_filename, *out_baseimg_param, *snapshot_name = NULL,
2235               *backing_fmt = NULL;
2236    BlockDriver *drv = NULL, *proto_drv = NULL;
2237    BlockDriverInfo bdi;
2238    BlockDriverState *out_bs;
2239    QemuOpts *opts = NULL, *sn_opts = NULL;
2240    QemuOptsList *create_opts = NULL;
2241    QDict *open_opts = NULL;
2242    char *options = NULL;
2243    Error *local_err = NULL;
2244    bool writethrough, src_writethrough, image_opts = false,
2245         skip_create = false, progress = false, tgt_image_opts = false;
2246    int64_t ret = -EINVAL;
2247    bool force_share = false;
2248    bool explict_min_sparse = false;
2249    bool bitmaps = false;
2250    bool skip_broken = false;
2251    int64_t rate_limit = 0;
2252
2253    ImgConvertState s = (ImgConvertState) {
2254        /* Need at least 4k of zeros for sparse detection */
2255        .min_sparse         = 8,
2256        .copy_range         = false,
2257        .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2258        .wr_in_order        = true,
2259        .num_coroutines     = 8,
2260    };
2261
2262    for(;;) {
2263        static const struct option long_options[] = {
2264            {"help", no_argument, 0, 'h'},
2265            {"source-format", required_argument, 0, 'f'},
2266            /*
2267             * XXX: historic --image-opts acts on source file only,
2268             * it seems better to have it affect both source and target,
2269             * and have separate --source-image-opts for source,
2270             * but this might break existing setups.
2271             */
2272            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2273            {"source-cache", required_argument, 0, 'T'},
2274            {"snapshot", required_argument, 0, 'l'},
2275            {"bitmaps", no_argument, 0, OPTION_BITMAPS},
2276            {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
2277            {"salvage", no_argument, 0, OPTION_SALVAGE},
2278            {"target-format", required_argument, 0, 'O'},
2279            {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2280            {"target-format-options", required_argument, 0, 'o'},
2281            {"target-cache", required_argument, 0, 't'},
2282            {"backing", required_argument, 0, 'b'},
2283            {"backing-format", required_argument, 0, 'F'},
2284            {"sparse-size", required_argument, 0, 'S'},
2285            {"no-create", no_argument, 0, 'n'},
2286            {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2287            {"force-share", no_argument, 0, 'U'},
2288            {"rate-limit", required_argument, 0, 'r'},
2289            {"parallel", required_argument, 0, 'm'},
2290            {"oob-writes", no_argument, 0, 'W'},
2291            {"copy-range-offloading", no_argument, 0, 'C'},
2292            {"progress", no_argument, 0, 'p'},
2293            {"quiet", no_argument, 0, 'q'},
2294            {"object", required_argument, 0, OPTION_OBJECT},
2295            {0, 0, 0, 0}
2296        };
2297        c = getopt_long(argc, argv, "hf:O:b:B:CcF:o:l:S:pt:T:nm:WUr:q",
2298                        long_options, NULL);
2299        if (c == -1) {
2300            break;
2301        }
2302        switch (c) {
2303        case 'h':
2304            cmd_help(ccmd, "[-f SRC_FMT | --image-opts] [-T SRC_CACHE]\n"
2305"        [-l SNAPSHOT] [--bitmaps [--skip-broken-bitmaps]] [--salvage]\n"
2306"        [-O TGT_FMT | --target-image-opts] [-o TGT_FMT_OPTS] [-t TGT_CACHE]\n"
2307"        [-b BACKING_FILE [-F BACKING_FMT]] [-S SPARSE_SIZE]\n"
2308"        [-n] [--target-is-zero] [-c]\n"
2309"        [-U] [-r RATE] [-m NUM_PARALLEL] [-W] [-C] [-p] [-q] [--object OBJDEF]\n"
2310"        SRC_FILE [SRC_FILE2...] TGT_FILE\n"
2311,
2312"  -f, --source-format SRC_FMT\n"
2313"     specify format of all SRC_FILEs explicitly (default: probing is used)\n"
2314"  --image-opts\n"
2315"     treat each SRC_FILE as an option string (key=value,...), not a file name\n"
2316"     (incompatible with -f|--source-format)\n"
2317"  -T, --source-cache SRC_CACHE\n"
2318"     source image(s) cache mode (" BDRV_DEFAULT_CACHE ")\n"
2319"  -l, --snapshot SNAPSHOT\n"
2320"     specify source snapshot\n"
2321"  --bitmaps\n"
2322"     also copy any persistent bitmaps present in source\n"
2323"  --skip-broken-bitmaps\n"
2324"     skip (do not error out) any broken bitmaps\n"
2325"  --salvage\n"
2326"     ignore errors on input (convert unreadable areas to zeros)\n"
2327"  -O, --target-format TGT_FMT\n"
2328"     specify TGT_FILE image format (default: raw)\n"
2329"  --target-image-opts\n"
2330"     treat TGT_FILE as an option string (key=value,...), not a file name\n"
2331"     (incompatible with -O|--target-format)\n"
2332"  -o, --target-format-options TGT_FMT_OPTS\n"
2333"     TGT_FMT-specific options\n"
2334"  -t, --target-cache TGT_CACHE\n"
2335"     cache mode when opening output image (default: unsafe)\n"
2336"  -b, --backing BACKING_FILE (was -B in <= 10.0)\n"
2337"     create target image to be a CoW on top of BACKING_FILE\n"
2338"  -F, --backing-format BACKING_FMT\n" /* -B used for -b in <=10.0 */
2339"     specify BACKING_FILE image format explicitly (default: probing is used)\n"
2340"  -S, --sparse-size SPARSE_SIZE[bkKMGTPE]\n"
2341"     specify number of consecutive zero bytes to treat as a gap on output\n"
2342"     (rounded down to nearest 512 bytes), with optional multiplier suffix\n"
2343"  -n, --no-create\n"
2344"     omit target volume creation (e.g. on rbd)\n"
2345"  --target-is-zero\n"
2346"     indicates that the target volume is pre-zeroed\n"
2347"  -c, --compress\n"
2348"     create compressed output image (qcow and qcow2 formats only)\n"
2349"  -U, --force-share\n"
2350"     open images in shared mode for concurrent access\n"
2351"  -r, --rate-limit RATE\n"
2352"     I/O rate limit, in bytes per second\n"
2353"  -m, --parallel NUM_PARALLEL\n"
2354"     specify parallelism (default: 8)\n"
2355"  -C, --copy-range-offloading\n"
2356"     try to use copy offloading\n"
2357"  -W, --oob-writes\n"
2358"     enable out-of-order writes to improve performance\n"
2359"  -p, --progress\n"
2360"     display progress information\n"
2361"  -q, --quiet\n"
2362"     quiet mode (produce only error messages if any)\n"
2363"  --object OBJDEF\n"
2364"     defines QEMU user-creatable object\n"
2365"  SRC_FILE...\n"
2366"     one or more source image file names,\n"
2367"     or option strings (key=value,..) with --source-image-opts\n"
2368"  TGT_FILE\n"
2369"     target (output) image file name,\n"
2370"     or option string (key=value,..) with --target-image-opts\n"
2371);
2372            break;
2373        case 'f':
2374            fmt = optarg;
2375            break;
2376        case OPTION_IMAGE_OPTS:
2377            image_opts = true;
2378            break;
2379        case 'T':
2380            src_cache = optarg;
2381            break;
2382        case 'l':
2383            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2384                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2385                                                  optarg, false);
2386                if (!sn_opts) {
2387                    error_report("Failed in parsing snapshot param '%s'",
2388                                 optarg);
2389                    goto fail_getopt;
2390                }
2391            } else {
2392                snapshot_name = optarg;
2393            }
2394            break;
2395        case OPTION_BITMAPS:
2396            bitmaps = true;
2397            break;
2398        case OPTION_SKIP_BROKEN:
2399            skip_broken = true;
2400            break;
2401        case OPTION_SALVAGE:
2402            s.salvage = true;
2403            break;
2404         case 'O':
2405            out_fmt = optarg;
2406            break;
2407        case OPTION_TARGET_IMAGE_OPTS:
2408            tgt_image_opts = true;
2409            break;
2410        case 'o':
2411            if (accumulate_options(&options, optarg) < 0) {
2412                goto fail_getopt;
2413            }
2414            break;
2415        case 't':
2416            cache = optarg;
2417            break;
2418        case 'B': /* <=10.0 */
2419        case 'b':
2420            out_baseimg = optarg;
2421            break;
2422        case 'F': /* can't use -B as it used as -b in <=10.0 */
2423            backing_fmt = optarg;
2424            break;
2425        case 'S':
2426        {
2427            int64_t sval;
2428
2429            sval = cvtnum("buffer size for sparse output", optarg, true);
2430            if (sval < 0) {
2431                goto fail_getopt;
2432            } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2433                sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2434                error_report("Invalid buffer size for sparse output specified. "
2435                    "Valid sizes are multiples of %llu up to %llu. Select "
2436                    "0 to disable sparse detection (fully allocates output).",
2437                    BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2438                goto fail_getopt;
2439            }
2440
2441            s.min_sparse = sval / BDRV_SECTOR_SIZE;
2442            explict_min_sparse = true;
2443            break;
2444        }
2445        case 'n':
2446            skip_create = true;
2447            break;
2448        case OPTION_TARGET_IS_ZERO:
2449            /*
2450             * The user asserting that the target is blank has the
2451             * same effect as the target driver supporting zero
2452             * initialisation.
2453             */
2454            s.has_zero_init = true;
2455            break;
2456        case 'c':
2457            s.compressed = true;
2458            break;
2459        case 'U':
2460            force_share = true;
2461            break;
2462        case 'r':
2463            rate_limit = cvtnum("rate limit", optarg, true);
2464            if (rate_limit < 0) {
2465                goto fail_getopt;
2466            }
2467            break;
2468        case 'm':
2469            s.num_coroutines = cvtnum_full("number of coroutines", optarg,
2470                                           false, 1, MAX_COROUTINES);
2471            if (s.num_coroutines < 0) {
2472                goto fail_getopt;
2473            }
2474            break;
2475        case 'W':
2476            s.wr_in_order = false;
2477            break;
2478        case 'C':
2479            s.copy_range = true;
2480            break;
2481        case 'p':
2482            progress = true;
2483            break;
2484        case 'q':
2485            s.quiet = true;
2486            break;
2487        case OPTION_OBJECT:
2488            user_creatable_process_cmdline(optarg);
2489            break;
2490        default:
2491            tryhelp(argv[0]);
2492        }
2493    }
2494
2495    if (!out_fmt && !tgt_image_opts) {
2496        out_fmt = "raw";
2497    }
2498
2499    if (skip_broken && !bitmaps) {
2500        error_report("Use of --skip-broken-bitmaps requires --bitmaps");
2501        goto fail_getopt;
2502    }
2503
2504    if (s.compressed && s.copy_range) {
2505        error_report("Cannot enable copy offloading when -c is used");
2506        goto fail_getopt;
2507    }
2508
2509    if (explict_min_sparse && s.copy_range) {
2510        error_report("Cannot enable copy offloading when -S is used");
2511        goto fail_getopt;
2512    }
2513
2514    if (s.copy_range && s.salvage) {
2515        error_report("Cannot use copy offloading in salvaging mode");
2516        goto fail_getopt;
2517    }
2518
2519    if (tgt_image_opts && !skip_create) {
2520        error_report("--target-image-opts requires use of -n flag");
2521        goto fail_getopt;
2522    }
2523
2524    if (skip_create && options) {
2525        error_report("-o has no effect when skipping image creation");
2526        goto fail_getopt;
2527    }
2528
2529    if (s.has_zero_init && !skip_create) {
2530        error_report("--target-is-zero requires use of -n flag");
2531        goto fail_getopt;
2532    }
2533
2534    s.src_num = argc - optind - 1;
2535    out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2536
2537    if (options && has_help_option(options)) {
2538        if (out_fmt) {
2539            ret = print_block_option_help(out_filename, out_fmt);
2540            goto fail_getopt;
2541        } else {
2542            error_report("Option help requires a format be specified");
2543            goto fail_getopt;
2544        }
2545    }
2546
2547    if (s.src_num < 1) {
2548        error_report("Must specify image file name");
2549        goto fail_getopt;
2550    }
2551
2552    /* ret is still -EINVAL until here */
2553    ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2554    if (ret < 0) {
2555        error_report("Invalid source cache option: %s", src_cache);
2556        goto fail_getopt;
2557    }
2558
2559    /* Initialize before goto out */
2560    if (s.quiet) {
2561        progress = false;
2562    }
2563    qemu_progress_init(progress, 1.0);
2564    qemu_progress_print(0, 100);
2565
2566    s.src = g_new0(BlockBackend *, s.src_num);
2567    s.src_sectors = g_new(int64_t, s.src_num);
2568    s.src_alignment = g_new(int, s.src_num);
2569
2570    for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2571        BlockDriverState *src_bs;
2572        s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2573                               fmt, src_flags, src_writethrough, s.quiet,
2574                               force_share);
2575        if (!s.src[bs_i]) {
2576            ret = -1;
2577            goto out;
2578        }
2579        s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2580        if (s.src_sectors[bs_i] < 0) {
2581            error_report("Could not get size of %s: %s",
2582                         argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2583            ret = -1;
2584            goto out;
2585        }
2586        src_bs = blk_bs(s.src[bs_i]);
2587        s.src_alignment[bs_i] = DIV_ROUND_UP(src_bs->bl.request_alignment,
2588                                             BDRV_SECTOR_SIZE);
2589        if (!bdrv_get_info(src_bs, &bdi)) {
2590            s.src_alignment[bs_i] = MAX(s.src_alignment[bs_i],
2591                                        bdi.cluster_size / BDRV_SECTOR_SIZE);
2592        }
2593        s.total_sectors += s.src_sectors[bs_i];
2594    }
2595
2596    if (sn_opts) {
2597        bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2598                               qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2599                               qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2600                               &local_err);
2601    } else if (snapshot_name != NULL) {
2602        if (s.src_num > 1) {
2603            error_report("No support for concatenating multiple snapshot");
2604            ret = -1;
2605            goto out;
2606        }
2607
2608        bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2609                                             &local_err);
2610    }
2611    if (local_err) {
2612        error_reportf_err(local_err, "Failed to load snapshot: ");
2613        ret = -1;
2614        goto out;
2615    }
2616
2617    if (!skip_create) {
2618        /* Find driver and parse its options */
2619        drv = bdrv_find_format(out_fmt);
2620        if (!drv) {
2621            error_report("Unknown file format '%s'", out_fmt);
2622            ret = -1;
2623            goto out;
2624        }
2625
2626        proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2627        if (!proto_drv) {
2628            error_report_err(local_err);
2629            ret = -1;
2630            goto out;
2631        }
2632
2633        if (!drv->create_opts) {
2634            error_report("Format driver '%s' does not support image creation",
2635                         drv->format_name);
2636            ret = -1;
2637            goto out;
2638        }
2639
2640        if (!proto_drv->create_opts) {
2641            error_report("Protocol driver '%s' does not support image creation",
2642                         proto_drv->format_name);
2643            ret = -1;
2644            goto out;
2645        }
2646
2647        create_opts = qemu_opts_append(create_opts, drv->create_opts);
2648        create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2649
2650        opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2651        if (options) {
2652            if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
2653                error_report_err(local_err);
2654                ret = -1;
2655                goto out;
2656            }
2657        }
2658
2659        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
2660                            s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
2661        ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
2662        if (ret < 0) {
2663            goto out;
2664        }
2665    }
2666
2667    /* Get backing file name if -o backing_file was used */
2668    out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2669    if (out_baseimg_param) {
2670        out_baseimg = out_baseimg_param;
2671    }
2672    s.target_has_backing = (bool) out_baseimg;
2673
2674    if (s.has_zero_init && s.target_has_backing) {
2675        error_report("Cannot use --target-is-zero when the destination "
2676                     "image has a backing file");
2677        goto out;
2678    }
2679
2680    if (s.src_num > 1 && out_baseimg) {
2681        error_report("Having a backing file for the target makes no sense when "
2682                     "concatenating multiple input images");
2683        ret = -1;
2684        goto out;
2685    }
2686
2687    if (out_baseimg_param) {
2688        if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
2689            error_report("Use of backing file requires explicit "
2690                         "backing format");
2691            ret = -1;
2692            goto out;
2693        }
2694    }
2695
2696    /* Check if compression is supported */
2697    if (s.compressed) {
2698        bool encryption =
2699            qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2700        const char *encryptfmt =
2701            qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2702        const char *preallocation =
2703            qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2704
2705        if (drv && !block_driver_can_compress(drv)) {
2706            error_report("Compression not supported for this file format");
2707            ret = -1;
2708            goto out;
2709        }
2710
2711        if (encryption || encryptfmt) {
2712            error_report("Compression and encryption not supported at "
2713                         "the same time");
2714            ret = -1;
2715            goto out;
2716        }
2717
2718        if (preallocation
2719            && strcmp(preallocation, "off"))
2720        {
2721            error_report("Compression and preallocation not supported at "
2722                         "the same time");
2723            ret = -1;
2724            goto out;
2725        }
2726    }
2727
2728    /* Determine if bitmaps need copying */
2729    if (bitmaps) {
2730        if (s.src_num > 1) {
2731            error_report("Copying bitmaps only possible with single source");
2732            ret = -1;
2733            goto out;
2734        }
2735        ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken);
2736        if (ret < 0) {
2737            goto out;
2738        }
2739    }
2740
2741    /*
2742     * The later open call will need any decryption secrets, and
2743     * bdrv_create() will purge "opts", so extract them now before
2744     * they are lost.
2745     */
2746    if (!skip_create) {
2747        open_opts = qdict_new();
2748        qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2749
2750        /* Create the new image */
2751        ret = bdrv_create(drv, out_filename, opts, &local_err);
2752        if (ret < 0) {
2753            error_reportf_err(local_err, "%s: error while converting %s: ",
2754                              out_filename, out_fmt);
2755            goto out;
2756        }
2757    }
2758
2759    s.target_is_new = !skip_create;
2760
2761    flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2762    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2763    if (ret < 0) {
2764        error_report("Invalid cache option: %s", cache);
2765        goto out;
2766    }
2767
2768    if (flags & BDRV_O_NOCACHE) {
2769        /*
2770         * If we open the target with O_DIRECT, it may be necessary to
2771         * extend its size to align to the physical sector size.
2772         */
2773        flags |= BDRV_O_RESIZE;
2774    }
2775
2776    if (skip_create) {
2777        s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2778                            flags, writethrough, s.quiet, false);
2779    } else {
2780        /* TODO ultimately we should allow --target-image-opts
2781         * to be used even when -n is not given.
2782         * That has to wait for bdrv_create to be improved
2783         * to allow filenames in option syntax
2784         */
2785        s.target = img_open_file(out_filename, open_opts, out_fmt,
2786                                 flags, writethrough, s.quiet, false);
2787        open_opts = NULL; /* blk_new_open will have freed it */
2788    }
2789    if (!s.target) {
2790        ret = -1;
2791        goto out;
2792    }
2793    out_bs = blk_bs(s.target);
2794
2795    if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) {
2796        error_report("Format driver '%s' does not support bitmaps",
2797                     out_bs->drv->format_name);
2798        ret = -1;
2799        goto out;
2800    }
2801
2802    if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2803        error_report("Compression not supported for this file format");
2804        ret = -1;
2805        goto out;
2806    }
2807
2808    /* increase bufsectors from the default 4096 (2M) if opt_transfer
2809     * or discard_alignment of the out_bs is greater. Limit to
2810     * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2811    s.buf_sectors = MIN(MAX_BUF_SECTORS,
2812                        MAX(s.buf_sectors,
2813                            MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2814                                out_bs->bl.pdiscard_alignment >>
2815                                BDRV_SECTOR_BITS)));
2816
2817    /* try to align the write requests to the destination to avoid unnecessary
2818     * RMW cycles. */
2819    s.alignment = MAX(pow2floor(s.min_sparse),
2820                      DIV_ROUND_UP(out_bs->bl.request_alignment,
2821                                   BDRV_SECTOR_SIZE));
2822    assert(is_power_of_2(s.alignment));
2823
2824    if (skip_create) {
2825        int64_t output_sectors = blk_nb_sectors(s.target);
2826        if (output_sectors < 0) {
2827            error_report("unable to get output image length: %s",
2828                         strerror(-output_sectors));
2829            ret = -1;
2830            goto out;
2831        } else if (output_sectors < s.total_sectors) {
2832            error_report("output file is smaller than input file");
2833            ret = -1;
2834            goto out;
2835        }
2836    }
2837
2838    if (s.target_has_backing && s.target_is_new) {
2839        /* Errors are treated as "backing length unknown" (which means
2840         * s.target_backing_sectors has to be negative, which it will
2841         * be automatically).  The backing file length is used only
2842         * for optimizations, so such a case is not fatal. */
2843        bdrv_graph_rdlock_main_loop();
2844        s.target_backing_sectors =
2845            bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
2846        bdrv_graph_rdunlock_main_loop();
2847    } else {
2848        s.target_backing_sectors = -1;
2849    }
2850
2851    ret = bdrv_get_info(out_bs, &bdi);
2852    if (ret < 0) {
2853        if (s.compressed) {
2854            error_report("could not get block driver info");
2855            goto out;
2856        }
2857    } else {
2858        s.compressed = s.compressed || bdi.needs_compressed_writes;
2859        s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2860    }
2861
2862    if (rate_limit) {
2863        set_rate_limit(s.target, rate_limit);
2864    }
2865
2866    ret = convert_do_copy(&s);
2867
2868    /* Now copy the bitmaps */
2869    if (bitmaps && ret == 0) {
2870        ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken);
2871    }
2872
2873out:
2874    if (!ret) {
2875        qemu_progress_print(100, 0);
2876    }
2877    qemu_progress_end();
2878    qemu_opts_del(opts);
2879    qemu_opts_free(create_opts);
2880    qobject_unref(open_opts);
2881    blk_unref(s.target);
2882    if (s.src) {
2883        for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2884            blk_unref(s.src[bs_i]);
2885        }
2886        g_free(s.src);
2887    }
2888    g_free(s.src_sectors);
2889    g_free(s.src_alignment);
2890fail_getopt:
2891    qemu_opts_del(sn_opts);
2892    g_free(options);
2893
2894    return !!ret;
2895}
2896
2897
2898static void dump_snapshots(BlockDriverState *bs)
2899{
2900    QEMUSnapshotInfo *sn_tab, *sn;
2901    int nb_sns, i;
2902
2903    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2904    if (nb_sns <= 0)
2905        return;
2906    printf("Snapshot list:\n");
2907    bdrv_snapshot_dump(NULL);
2908    printf("\n");
2909    for(i = 0; i < nb_sns; i++) {
2910        sn = &sn_tab[i];
2911        bdrv_snapshot_dump(sn);
2912        printf("\n");
2913    }
2914    g_free(sn_tab);
2915}
2916
2917static void dump_json_block_graph_info_list(BlockGraphInfoList *list)
2918{
2919    GString *str;
2920    QObject *obj;
2921    Visitor *v = qobject_output_visitor_new(&obj);
2922
2923    visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort);
2924    visit_complete(v, &obj);
2925    str = qobject_to_json_pretty(obj, true);
2926    assert(str != NULL);
2927    printf("%s\n", str->str);
2928    qobject_unref(obj);
2929    visit_free(v);
2930    g_string_free(str, true);
2931}
2932
2933static void dump_json_block_graph_info(BlockGraphInfo *info)
2934{
2935    GString *str;
2936    QObject *obj;
2937    Visitor *v = qobject_output_visitor_new(&obj);
2938
2939    visit_type_BlockGraphInfo(v, NULL, &info, &error_abort);
2940    visit_complete(v, &obj);
2941    str = qobject_to_json_pretty(obj, true);
2942    assert(str != NULL);
2943    printf("%s\n", str->str);
2944    qobject_unref(obj);
2945    visit_free(v);
2946    g_string_free(str, true);
2947}
2948
2949static void dump_human_image_info(BlockGraphInfo *info, int indentation,
2950                                  const char *path)
2951{
2952    BlockChildInfoList *children_list;
2953
2954    bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation,
2955                        info->children == NULL);
2956
2957    for (children_list = info->children; children_list;
2958         children_list = children_list->next)
2959    {
2960        BlockChildInfo *child = children_list->value;
2961        g_autofree char *child_path = NULL;
2962
2963        printf("%*sChild node '%s%s':\n",
2964               indentation * 4, "", path, child->name);
2965        child_path = g_strdup_printf("%s%s/", path, child->name);
2966        dump_human_image_info(child->info, indentation + 1, child_path);
2967    }
2968}
2969
2970static void dump_human_image_info_list(BlockGraphInfoList *list)
2971{
2972    BlockGraphInfoList *elem;
2973    bool delim = false;
2974
2975    for (elem = list; elem; elem = elem->next) {
2976        if (delim) {
2977            printf("\n");
2978        }
2979        delim = true;
2980
2981        dump_human_image_info(elem->value, 0, "/");
2982    }
2983}
2984
2985static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2986{
2987    return strcmp(a, b) == 0;
2988}
2989
2990/**
2991 * Open an image file chain and return an BlockGraphInfoList
2992 *
2993 * @filename: topmost image filename
2994 * @fmt: topmost image format (may be NULL to autodetect)
2995 * @chain: true  - enumerate entire backing file chain
2996 *         false - only topmost image file
2997 *
2998 * Returns a list of BlockNodeInfo objects or NULL if there was an error
2999 * opening an image file.  If there was an error a message will have been
3000 * printed to stderr.
3001 */
3002static BlockGraphInfoList *collect_image_info_list(bool image_opts,
3003                                                   const char *filename,
3004                                                   const char *fmt,
3005                                                   bool chain, bool force_share)
3006{
3007    BlockGraphInfoList *head = NULL;
3008    BlockGraphInfoList **tail = &head;
3009    GHashTable *filenames;
3010    Error *err = NULL;
3011
3012    filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
3013
3014    while (filename) {
3015        BlockBackend *blk;
3016        BlockDriverState *bs;
3017        BlockGraphInfo *info;
3018
3019        if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
3020            error_report("Backing file '%s' creates an infinite loop.",
3021                         filename);
3022            goto err;
3023        }
3024        g_hash_table_insert(filenames, (gpointer)filename, NULL);
3025
3026        blk = img_open(image_opts, filename, fmt,
3027                       BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
3028                       force_share);
3029        if (!blk) {
3030            goto err;
3031        }
3032        bs = blk_bs(blk);
3033
3034        /*
3035         * Note that the returned BlockGraphInfo object will not have
3036         * information about this image's backing node, because we have opened
3037         * it with BDRV_O_NO_BACKING.  Printing this object will therefore not
3038         * duplicate the backing chain information that we obtain by walking
3039         * the chain manually here.
3040         */
3041        bdrv_graph_rdlock_main_loop();
3042        bdrv_query_block_graph_info(bs, &info, &err);
3043        bdrv_graph_rdunlock_main_loop();
3044
3045        if (err) {
3046            error_report_err(err);
3047            blk_unref(blk);
3048            goto err;
3049        }
3050
3051        QAPI_LIST_APPEND(tail, info);
3052
3053        blk_unref(blk);
3054
3055        /* Clear parameters that only apply to the topmost image */
3056        filename = fmt = NULL;
3057        image_opts = false;
3058
3059        if (chain) {
3060            if (info->full_backing_filename) {
3061                filename = info->full_backing_filename;
3062            } else if (info->backing_filename) {
3063                error_report("Could not determine absolute backing filename,"
3064                             " but backing filename '%s' present",
3065                             info->backing_filename);
3066                goto err;
3067            }
3068            if (info->backing_filename_format) {
3069                fmt = info->backing_filename_format;
3070            }
3071        }
3072    }
3073    g_hash_table_destroy(filenames);
3074    return head;
3075
3076err:
3077    qapi_free_BlockGraphInfoList(head);
3078    g_hash_table_destroy(filenames);
3079    return NULL;
3080}
3081
3082static int img_info(const img_cmd_t *ccmd, int argc, char **argv)
3083{
3084    int c;
3085    OutputFormat output_format = OFORMAT_HUMAN;
3086    bool chain = false;
3087    const char *filename, *fmt;
3088    BlockGraphInfoList *list;
3089    bool image_opts = false;
3090    bool force_share = false;
3091
3092    fmt = NULL;
3093    for(;;) {
3094        static const struct option long_options[] = {
3095            {"help", no_argument, 0, 'h'},
3096            {"format", required_argument, 0, 'f'},
3097            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3098            {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
3099            {"force-share", no_argument, 0, 'U'},
3100            {"output", required_argument, 0, OPTION_OUTPUT},
3101            {"object", required_argument, 0, OPTION_OBJECT},
3102            {0, 0, 0, 0}
3103        };
3104        c = getopt_long(argc, argv, "hf:U", long_options, NULL);
3105        if (c == -1) {
3106            break;
3107        }
3108        switch(c) {
3109        case 'h':
3110            cmd_help(ccmd, "[-f FMT | --image-opts] [--backing-chain] [-U]\n"
3111"        [--output human|json] [--object OBJDEF] FILE\n"
3112,
3113"  -f, --format FMT\n"
3114"     specify FILE image format explicitly (default: probing is used)\n"
3115"  --image-opts\n"
3116"     treat FILE as an option string (key=value,..), not a file name\n"
3117"     (incompatible with -f|--format)\n"
3118"  --backing-chain\n"
3119"     display information about the backing chain for copy-on-write overlays\n"
3120"  -U, --force-share\n"
3121"     open image in shared mode for concurrent access\n"
3122"  --output human|json\n"
3123"     specify output format (default: human)\n"
3124"  --object OBJDEF\n"
3125"     defines QEMU user-creatable object\n"
3126"  FILE\n"
3127"     name of the image file, or option string (key=value,..)\n"
3128"     with --image-opts, to operate on\n"
3129);
3130            break;
3131        case 'f':
3132            fmt = optarg;
3133            break;
3134        case OPTION_IMAGE_OPTS:
3135            image_opts = true;
3136            break;
3137        case OPTION_BACKING_CHAIN:
3138            chain = true;
3139            break;
3140        case 'U':
3141            force_share = true;
3142            break;
3143        case OPTION_OUTPUT:
3144            output_format = parse_output_format(argv[0], optarg);
3145            break;
3146        case OPTION_OBJECT:
3147            user_creatable_process_cmdline(optarg);
3148            break;
3149        default:
3150            tryhelp(argv[0]);
3151        }
3152    }
3153    if (optind != argc - 1) {
3154        error_exit(argv[0], "Expecting one image file name");
3155    }
3156    filename = argv[optind++];
3157
3158    list = collect_image_info_list(image_opts, filename, fmt, chain,
3159                                   force_share);
3160    if (!list) {
3161        return 1;
3162    }
3163
3164    switch (output_format) {
3165    case OFORMAT_HUMAN:
3166        dump_human_image_info_list(list);
3167        break;
3168    case OFORMAT_JSON:
3169        if (chain) {
3170            dump_json_block_graph_info_list(list);
3171        } else {
3172            dump_json_block_graph_info(list->value);
3173        }
3174        break;
3175    }
3176
3177    qapi_free_BlockGraphInfoList(list);
3178    return 0;
3179}
3180
3181static int dump_map_entry(OutputFormat output_format, MapEntry *e,
3182                          MapEntry *next)
3183{
3184    switch (output_format) {
3185    case OFORMAT_HUMAN:
3186        if (e->data && !e->has_offset) {
3187            error_report("File contains external, encrypted or compressed clusters.");
3188            return -1;
3189        }
3190        if (e->data && !e->zero) {
3191            printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
3192                   e->start, e->length,
3193                   e->has_offset ? e->offset : 0,
3194                   e->filename ?: "");
3195        }
3196        /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
3197         * Modify the flags here to allow more coalescing.
3198         */
3199        if (next && (!next->data || next->zero)) {
3200            next->data = false;
3201            next->zero = true;
3202        }
3203        break;
3204    case OFORMAT_JSON:
3205        printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
3206               " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
3207               " \"data\": %s, \"compressed\": %s",
3208               e->start, e->length, e->depth,
3209               e->present ? "true" : "false",
3210               e->zero ? "true" : "false",
3211               e->data ? "true" : "false",
3212               e->compressed ? "true" : "false");
3213        if (e->has_offset) {
3214            printf(", \"offset\": %"PRId64"", e->offset);
3215        }
3216        putchar('}');
3217
3218        if (next) {
3219            puts(",");
3220        }
3221        break;
3222    }
3223    return 0;
3224}
3225
3226static int get_block_status(BlockDriverState *bs, int64_t offset,
3227                            int64_t bytes, MapEntry *e)
3228{
3229    int ret;
3230    int depth;
3231    BlockDriverState *file;
3232    bool has_offset;
3233    int64_t map;
3234    char *filename = NULL;
3235
3236    GLOBAL_STATE_CODE();
3237    GRAPH_RDLOCK_GUARD_MAINLOOP();
3238
3239    /* As an optimization, we could cache the current range of unallocated
3240     * clusters in each file of the chain, and avoid querying the same
3241     * range repeatedly.
3242     */
3243
3244    depth = 0;
3245    for (;;) {
3246        bs = bdrv_skip_filters(bs);
3247        ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
3248        if (ret < 0) {
3249            return ret;
3250        }
3251        assert(bytes);
3252        if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
3253            break;
3254        }
3255        bs = bdrv_cow_bs(bs);
3256        if (bs == NULL) {
3257            ret = 0;
3258            break;
3259        }
3260
3261        depth++;
3262    }
3263
3264    has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
3265
3266    if (file && has_offset) {
3267        bdrv_refresh_filename(file);
3268        filename = file->filename;
3269    }
3270
3271    *e = (MapEntry) {
3272        .start = offset,
3273        .length = bytes,
3274        .data = !!(ret & BDRV_BLOCK_DATA),
3275        .zero = !!(ret & BDRV_BLOCK_ZERO),
3276        .compressed = !!(ret & BDRV_BLOCK_COMPRESSED),
3277        .offset = map,
3278        .has_offset = has_offset,
3279        .depth = depth,
3280        .present = !!(ret & BDRV_BLOCK_ALLOCATED),
3281        .filename = filename,
3282    };
3283
3284    return 0;
3285}
3286
3287static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
3288{
3289    if (curr->length == 0) {
3290        return false;
3291    }
3292    if (curr->zero != next->zero ||
3293        curr->data != next->data ||
3294        curr->compressed != next->compressed ||
3295        curr->depth != next->depth ||
3296        curr->present != next->present ||
3297        !curr->filename != !next->filename ||
3298        curr->has_offset != next->has_offset) {
3299        return false;
3300    }
3301    if (curr->filename && strcmp(curr->filename, next->filename)) {
3302        return false;
3303    }
3304    if (curr->has_offset && curr->offset + curr->length != next->offset) {
3305        return false;
3306    }
3307    return true;
3308}
3309
3310static int img_map(const img_cmd_t *ccmd, int argc, char **argv)
3311{
3312    int c;
3313    OutputFormat output_format = OFORMAT_HUMAN;
3314    BlockBackend *blk;
3315    BlockDriverState *bs;
3316    const char *filename, *fmt;
3317    int64_t length;
3318    MapEntry curr = { .length = 0 }, next;
3319    int ret = 0;
3320    bool image_opts = false;
3321    bool force_share = false;
3322    int64_t start_offset = 0;
3323    int64_t max_length = -1;
3324
3325    fmt = NULL;
3326    for (;;) {
3327        static const struct option long_options[] = {
3328            {"help", no_argument, 0, 'h'},
3329            {"format", required_argument, 0, 'f'},
3330            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3331            {"start-offset", required_argument, 0, 's'},
3332            {"max-length", required_argument, 0, 'l'},
3333            {"force-share", no_argument, 0, 'U'},
3334            {"output", required_argument, 0, OPTION_OUTPUT},
3335            {"object", required_argument, 0, OPTION_OBJECT},
3336            {0, 0, 0, 0}
3337        };
3338        c = getopt_long(argc, argv, "hf:s:l:U",
3339                        long_options, NULL);
3340        if (c == -1) {
3341            break;
3342        }
3343        switch (c) {
3344        case 'h':
3345            cmd_help(ccmd, "[-f FMT | --image-opts]\n"
3346"        [--start-offset OFFSET] [--max-length LENGTH]\n"
3347"        [--output human|json] [-U] [--object OBJDEF] FILE\n"
3348,
3349"  -f, --format FMT\n"
3350"     specify FILE image format explicitly (default: probing is used)\n"
3351"  --image-opts\n"
3352"     treat FILE as an option string (key=value,..), not a file name\n"
3353"     (incompatible with -f|--format)\n"
3354"  -s, --start-offset OFFSET\n"
3355"     start at the given OFFSET in the image, not at the beginning\n"
3356"  -l, --max-length LENGTH\n"
3357"     process at most LENGTH bytes instead of up to the end of the image\n"
3358"  --output human|json\n"
3359"     specify output format name (default: human)\n"
3360"  -U, --force-share\n"
3361"     open image in shared mode for concurrent access\n"
3362"  --object OBJDEF\n"
3363"     defines QEMU user-creatable object\n"
3364"  FILE\n"
3365"     the image file name, or option string (key=value,..)\n"
3366"     with --image-opts, to operate on\n"
3367);
3368            break;
3369        case 'f':
3370            fmt = optarg;
3371            break;
3372        case OPTION_IMAGE_OPTS:
3373            image_opts = true;
3374            break;
3375        case 's':
3376            start_offset = cvtnum("start offset", optarg, true);
3377            if (start_offset < 0) {
3378                return 1;
3379            }
3380            break;
3381        case 'l':
3382            max_length = cvtnum("max length", optarg, true);
3383            if (max_length < 0) {
3384                return 1;
3385            }
3386            break;
3387        case OPTION_OUTPUT:
3388            output_format = parse_output_format(argv[0], optarg);
3389            break;
3390        case 'U':
3391            force_share = true;
3392            break;
3393        case OPTION_OBJECT:
3394            user_creatable_process_cmdline(optarg);
3395            break;
3396        default:
3397            tryhelp(argv[0]);
3398        }
3399    }
3400    if (optind != argc - 1) {
3401        error_exit(argv[0], "Expecting one image file name");
3402    }
3403    filename = argv[optind];
3404
3405    blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3406    if (!blk) {
3407        return 1;
3408    }
3409    bs = blk_bs(blk);
3410
3411    if (output_format == OFORMAT_HUMAN) {
3412        printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3413    } else if (output_format == OFORMAT_JSON) {
3414        putchar('[');
3415    }
3416
3417    length = blk_getlength(blk);
3418    if (length < 0) {
3419        error_report("Failed to get size for '%s'", filename);
3420        return 1;
3421    }
3422    if (max_length != -1) {
3423        length = MIN(start_offset + max_length, length);
3424    }
3425
3426    curr.start = start_offset;
3427    while (curr.start + curr.length < length) {
3428        int64_t offset = curr.start + curr.length;
3429        int64_t n = length - offset;
3430
3431        ret = get_block_status(bs, offset, n, &next);
3432        if (ret < 0) {
3433            error_report("Could not read file metadata: %s", strerror(-ret));
3434            goto out;
3435        }
3436
3437        if (entry_mergeable(&curr, &next)) {
3438            curr.length += next.length;
3439            continue;
3440        }
3441
3442        if (curr.length > 0) {
3443            ret = dump_map_entry(output_format, &curr, &next);
3444            if (ret < 0) {
3445                goto out;
3446            }
3447        }
3448        curr = next;
3449    }
3450
3451    ret = dump_map_entry(output_format, &curr, NULL);
3452    if (output_format == OFORMAT_JSON) {
3453        puts("]");
3454    }
3455
3456out:
3457    blk_unref(blk);
3458    return ret < 0;
3459}
3460
3461/* the same as options */
3462#define SNAPSHOT_LIST   'l'
3463#define SNAPSHOT_CREATE 'c'
3464#define SNAPSHOT_APPLY  'a'
3465#define SNAPSHOT_DELETE 'd'
3466
3467static int img_snapshot(const img_cmd_t *ccmd, int argc, char **argv)
3468{
3469    BlockBackend *blk;
3470    BlockDriverState *bs;
3471    QEMUSnapshotInfo sn;
3472    char *filename, *fmt = NULL, *snapshot_name = NULL;
3473    int c, ret = 0;
3474    int action = 0;
3475    bool quiet = false;
3476    Error *err = NULL;
3477    bool image_opts = false;
3478    bool force_share = false;
3479    int64_t rt;
3480
3481    /* Parse commandline parameters */
3482    for(;;) {
3483        static const struct option long_options[] = {
3484            {"help", no_argument, 0, 'h'},
3485            {"format", required_argument, 0, 'f'},
3486            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3487            {"list", no_argument, 0, SNAPSHOT_LIST},
3488            {"apply", required_argument, 0, SNAPSHOT_APPLY},
3489            {"create", required_argument, 0, SNAPSHOT_CREATE},
3490            {"delete", required_argument, 0, SNAPSHOT_DELETE},
3491            {"force-share", no_argument, 0, 'U'},
3492            {"quiet", no_argument, 0, 'q'},
3493            {"object", required_argument, 0, OPTION_OBJECT},
3494            {0, 0, 0, 0}
3495        };
3496        c = getopt_long(argc, argv, "hf:la:c:d:Uq",
3497                        long_options, NULL);
3498        if (c == -1) {
3499            break;
3500        }
3501        switch(c) {
3502        case 'h':
3503            cmd_help(ccmd, "[-f FMT | --image-opts] [-l | -a|-c|-d SNAPSHOT]\n"
3504"        [-U] [-q] [--object OBJDEF] FILE\n"
3505,
3506"  -f, --format FMT\n"
3507"     specify FILE format explicitly (default: probing is used)\n"
3508"  --image-opts\n"
3509"     treat FILE as an option string (key=value,..), not a file name\n"
3510"     (incompatible with -f|--format)\n"
3511"  -l, --list\n"
3512"     list snapshots in FILE (default action if no -l|-c|-a|-d is given)\n"
3513"  -c, --create SNAPSHOT\n"
3514"     create named snapshot\n"
3515"  -a, --apply SNAPSHOT\n"
3516"     apply named snapshot to the base\n"
3517"  -d, --delete SNAPSHOT\n"
3518"     delete named snapshot\n"
3519"  (only one of -l|-c|-a|-d can be specified)\n"
3520"  -U, --force-share\n"
3521"     open image in shared mode for concurrent access\n"
3522"  -q, --quiet\n"
3523"     quiet mode (produce only error messages if any)\n"
3524"  --object OBJDEF\n"
3525"     defines QEMU user-creatable object\n"
3526"  FILE\n"
3527"     name of the image file, or option string (key=value,..)\n"
3528"     with --image-opts) to operate on\n"
3529);
3530            break;
3531        case 'f':
3532            fmt = optarg;
3533            break;
3534        case OPTION_IMAGE_OPTS:
3535            image_opts = true;
3536            break;
3537        case SNAPSHOT_LIST:
3538        case SNAPSHOT_APPLY:
3539        case SNAPSHOT_CREATE:
3540        case SNAPSHOT_DELETE:
3541            if (action) {
3542                error_exit(argv[0], "Cannot mix '-l', '-a', '-c', '-d'");
3543                return 0;
3544            }
3545            action = c;
3546            snapshot_name = optarg;
3547            break;
3548        case 'U':
3549            force_share = true;
3550            break;
3551        case 'q':
3552            quiet = true;
3553            break;
3554        case OPTION_OBJECT:
3555            user_creatable_process_cmdline(optarg);
3556            break;
3557        default:
3558            tryhelp(argv[0]);
3559        }
3560    }
3561
3562    if (optind != argc - 1) {
3563        error_exit(argv[0], "Expecting one image file name");
3564    }
3565    filename = argv[optind++];
3566
3567    if (!action) {
3568        action = SNAPSHOT_LIST;
3569    }
3570
3571    /* Open the image */
3572    blk = img_open(image_opts, filename, fmt,
3573                   action == SNAPSHOT_LIST ? 0 : BDRV_O_RDWR,
3574                   false, quiet, force_share);
3575    if (!blk) {
3576        return 1;
3577    }
3578    bs = blk_bs(blk);
3579
3580    /* Perform the requested action */
3581    switch(action) {
3582    case SNAPSHOT_LIST:
3583        dump_snapshots(bs);
3584        break;
3585
3586    case SNAPSHOT_CREATE:
3587        memset(&sn, 0, sizeof(sn));
3588        pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3589
3590        rt = g_get_real_time();
3591        sn.date_sec = rt / G_USEC_PER_SEC;
3592        sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000;
3593
3594        bdrv_graph_rdlock_main_loop();
3595        ret = bdrv_snapshot_create(bs, &sn);
3596        bdrv_graph_rdunlock_main_loop();
3597
3598        if (ret) {
3599            error_report("Could not create snapshot '%s': %s",
3600                snapshot_name, strerror(-ret));
3601        }
3602        break;
3603
3604    case SNAPSHOT_APPLY:
3605        ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3606        if (ret) {
3607            error_reportf_err(err, "Could not apply snapshot '%s': ",
3608                              snapshot_name);
3609        }
3610        break;
3611
3612    case SNAPSHOT_DELETE:
3613        bdrv_drain_all_begin();
3614        bdrv_graph_rdlock_main_loop();
3615        ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3616        if (ret < 0) {
3617            error_report("Could not delete snapshot '%s': snapshot not "
3618                         "found", snapshot_name);
3619            ret = 1;
3620        } else {
3621            ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3622            if (ret < 0) {
3623                error_reportf_err(err, "Could not delete snapshot '%s': ",
3624                                  snapshot_name);
3625                ret = 1;
3626            }
3627        }
3628        bdrv_graph_rdunlock_main_loop();
3629        bdrv_drain_all_end();
3630        break;
3631    }
3632
3633    /* Cleanup */
3634    blk_unref(blk);
3635    if (ret) {
3636        return 1;
3637    }
3638    return 0;
3639}
3640
3641static int img_rebase(const img_cmd_t *ccmd, int argc, char **argv)
3642{
3643    BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3644    uint8_t *buf_old = NULL;
3645    uint8_t *buf_new = NULL;
3646    BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3647    BlockDriverState *unfiltered_bs, *unfiltered_bs_cow;
3648    BlockDriverInfo bdi = {0};
3649    char *filename;
3650    const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3651    int c, flags, src_flags, ret;
3652    BdrvRequestFlags write_flags = 0;
3653    bool writethrough, src_writethrough;
3654    int unsafe = 0;
3655    bool force_share = false;
3656    int progress = 0;
3657    bool quiet = false;
3658    bool compress = false;
3659    Error *local_err = NULL;
3660    bool image_opts = false;
3661    int64_t write_align;
3662
3663    /* Parse commandline parameters */
3664    fmt = NULL;
3665    cache = BDRV_DEFAULT_CACHE;
3666    src_cache = BDRV_DEFAULT_CACHE;
3667    out_baseimg = NULL;
3668    out_basefmt = NULL;
3669    for(;;) {
3670        static const struct option long_options[] = {
3671            {"help", no_argument, 0, 'h'},
3672            {"format", required_argument, 0, 'f'},
3673            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3674            {"cache", required_argument, 0, 't'},
3675            {"compress", no_argument, 0, 'c'},
3676            {"backing", required_argument, 0, 'b'},
3677            {"backing-format", required_argument, 0, 'B'},
3678            {"backing-cache", required_argument, 0, 'T'},
3679            {"backing-unsafe", no_argument, 0, 'u'},
3680            {"force-share", no_argument, 0, 'U'},
3681            {"progress", no_argument, 0, 'p'},
3682            {"quiet", no_argument, 0, 'q'},
3683            {"object", required_argument, 0, OPTION_OBJECT},
3684            {0, 0, 0, 0}
3685        };
3686        c = getopt_long(argc, argv, "hf:t:cb:F:B:T:uUpq",
3687                        long_options, NULL);
3688        if (c == -1) {
3689            break;
3690        }
3691        switch (c) {
3692        case 'h':
3693            cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE]\n"
3694"        [-b BACKING_FILE [-B BACKING_FMT] [-T BACKING_CACHE]] [-u]\n"
3695"        [-c] [-U] [-p] [-q] [--object OBJDEF] FILE\n"
3696,
3697"  -f, --format FMT\n"
3698"     specify FILE format explicitly (default: probing is used)\n"
3699"  --image-opts\n"
3700"     treat FILE as an option string (key=value,..), not a file name\n"
3701"     (incompatible with -f|--format)\n"
3702"  -t, --cache CACHE\n"
3703"     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
3704"  -b, --backing BACKING_FILE|\"\"\n"
3705"     rebase onto this file (specify empty name for no backing file)\n"
3706"  -B, --backing-format BACKING_FMT (was -F in <=10.0)\n"
3707"     specify format for BACKING_FILE explicitly (default: probing is used)\n"
3708"  -T, --backing-cache CACHE\n"
3709"     BACKING_FILE cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
3710"  -u, --backing-unsafe\n"
3711"     do not fail if BACKING_FILE can not be read\n"
3712"  -c, --compress\n"
3713"     compress image (when image supports this)\n"
3714"  -U, --force-share\n"
3715"     open image in shared mode for concurrent access\n"
3716"  -p, --progress\n"
3717"     display progress information\n"
3718"  -q, --quiet\n"
3719"     quiet mode (produce only error messages if any)\n"
3720"  --object OBJDEF\n"
3721"     defines QEMU user-creatable object\n"
3722"  FILE\n"
3723"     name of the image file, or option string (key=value,..)\n"
3724"     with --image-opts, to operate on\n"
3725);
3726            return 0;
3727        case 'f':
3728            fmt = optarg;
3729            break;
3730        case OPTION_IMAGE_OPTS:
3731            image_opts = true;
3732            break;
3733        case 't':
3734            cache = optarg;
3735            break;
3736        case 'b':
3737            out_baseimg = optarg;
3738            break;
3739        case 'F': /* <=10.0 */
3740        case 'B':
3741            out_basefmt = optarg;
3742            break;
3743        case 'u':
3744            unsafe = 1;
3745            break;
3746        case 'c':
3747            compress = true;
3748            break;
3749        case 'U':
3750            force_share = true;
3751            break;
3752        case 'p':
3753            progress = 1;
3754            break;
3755        case 'T':
3756            src_cache = optarg;
3757            break;
3758        case 'q':
3759            quiet = true;
3760            break;
3761        case OPTION_OBJECT:
3762            user_creatable_process_cmdline(optarg);
3763            break;
3764        default:
3765            tryhelp(argv[0]);
3766        }
3767    }
3768
3769    if (quiet) {
3770        progress = 0;
3771    }
3772
3773    if (optind != argc - 1) {
3774        error_exit(argv[0], "Expecting one image file name");
3775    }
3776    if (!unsafe && !out_baseimg) {
3777        error_exit(argv[0],
3778                   "Must specify backing file (-b) or use unsafe mode (-u)");
3779    }
3780    filename = argv[optind++];
3781
3782    qemu_progress_init(progress, 2.0);
3783    qemu_progress_print(0, 100);
3784
3785    flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3786    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3787    if (ret < 0) {
3788        error_report("Invalid cache option: %s", cache);
3789        goto out;
3790    }
3791
3792    src_flags = 0;
3793    ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3794    if (ret < 0) {
3795        error_report("Invalid source cache option: %s", src_cache);
3796        goto out;
3797    }
3798
3799    /* The source files are opened read-only, don't care about WCE */
3800    assert((src_flags & BDRV_O_RDWR) == 0);
3801    (void) src_writethrough;
3802
3803    /*
3804     * Open the images.
3805     *
3806     * Ignore the old backing file for unsafe rebase in case we want to correct
3807     * the reference to a renamed or moved backing file.
3808     */
3809    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3810                   false);
3811    if (!blk) {
3812        ret = -1;
3813        goto out;
3814    }
3815    bs = blk_bs(blk);
3816
3817    bdrv_graph_rdlock_main_loop();
3818    unfiltered_bs = bdrv_skip_filters(bs);
3819    unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs);
3820    bdrv_graph_rdunlock_main_loop();
3821
3822    if (compress && !block_driver_can_compress(unfiltered_bs->drv)) {
3823        error_report("Compression not supported for this file format");
3824        ret = -1;
3825        goto out;
3826    } else if (compress) {
3827        write_flags |= BDRV_REQ_WRITE_COMPRESSED;
3828    }
3829
3830    if (out_basefmt != NULL) {
3831        if (bdrv_find_format(out_basefmt) == NULL) {
3832            error_report("Invalid format name: '%s'", out_basefmt);
3833            ret = -1;
3834            goto out;
3835        }
3836    }
3837
3838    /*
3839     * We need overlay subcluster size (or cluster size in case writes are
3840     * compressed) to make sure write requests are aligned.
3841     */
3842    ret = bdrv_get_info(unfiltered_bs, &bdi);
3843    if (ret < 0) {
3844        error_report("could not get block driver info");
3845        goto out;
3846    } else if (bdi.subcluster_size == 0) {
3847        bdi.cluster_size = bdi.subcluster_size = 1;
3848    }
3849
3850    write_align = compress ? bdi.cluster_size : bdi.subcluster_size;
3851
3852    /* For safe rebasing we need to compare old and new backing file */
3853    if (!unsafe) {
3854        QDict *options = NULL;
3855        BlockDriverState *base_bs;
3856
3857        bdrv_graph_rdlock_main_loop();
3858        base_bs = bdrv_cow_bs(unfiltered_bs);
3859        bdrv_graph_rdunlock_main_loop();
3860
3861        if (base_bs) {
3862            blk_old_backing = blk_new(qemu_get_aio_context(),
3863                                      BLK_PERM_CONSISTENT_READ,
3864                                      BLK_PERM_ALL);
3865            ret = blk_insert_bs(blk_old_backing, base_bs,
3866                                &local_err);
3867            if (ret < 0) {
3868                error_reportf_err(local_err,
3869                                  "Could not reuse old backing file '%s': ",
3870                                  base_bs->filename);
3871                goto out;
3872            }
3873        } else {
3874            blk_old_backing = NULL;
3875        }
3876
3877        if (out_baseimg[0]) {
3878            const char *overlay_filename;
3879            char *out_real_path;
3880
3881            options = qdict_new();
3882            if (out_basefmt) {
3883                qdict_put_str(options, "driver", out_basefmt);
3884            }
3885            if (force_share) {
3886                qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3887            }
3888
3889            bdrv_graph_rdlock_main_loop();
3890            bdrv_refresh_filename(bs);
3891            bdrv_graph_rdunlock_main_loop();
3892            overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3893                                                     : bs->filename;
3894            out_real_path =
3895                bdrv_get_full_backing_filename_from_filename(overlay_filename,
3896                                                             out_baseimg,
3897                                                             &local_err);
3898            if (local_err) {
3899                qobject_unref(options);
3900                error_reportf_err(local_err,
3901                                  "Could not resolve backing filename: ");
3902                ret = -1;
3903                goto out;
3904            }
3905
3906            /*
3907             * Find out whether we rebase an image on top of a previous image
3908             * in its chain.
3909             */
3910            prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3911            if (prefix_chain_bs) {
3912                qobject_unref(options);
3913                g_free(out_real_path);
3914
3915                blk_new_backing = blk_new(qemu_get_aio_context(),
3916                                          BLK_PERM_CONSISTENT_READ,
3917                                          BLK_PERM_ALL);
3918                ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3919                                    &local_err);
3920                if (ret < 0) {
3921                    error_reportf_err(local_err,
3922                                      "Could not reuse backing file '%s': ",
3923                                      out_baseimg);
3924                    goto out;
3925                }
3926            } else {
3927                blk_new_backing = blk_new_open(out_real_path, NULL,
3928                                               options, src_flags, &local_err);
3929                g_free(out_real_path);
3930                if (!blk_new_backing) {
3931                    error_reportf_err(local_err,
3932                                      "Could not open new backing file '%s': ",
3933                                      out_baseimg);
3934                    ret = -1;
3935                    goto out;
3936                }
3937            }
3938        }
3939    }
3940
3941    /*
3942     * Check each unallocated cluster in the COW file. If it is unallocated,
3943     * accesses go to the backing file. We must therefore compare this cluster
3944     * in the old and new backing file, and if they differ we need to copy it
3945     * from the old backing file into the COW file.
3946     *
3947     * If qemu-img crashes during this step, no harm is done. The content of
3948     * the image is the same as the original one at any time.
3949     */
3950    if (!unsafe) {
3951        int64_t size;
3952        int64_t old_backing_size = 0;
3953        int64_t new_backing_size = 0;
3954        uint64_t offset;
3955        int64_t n, n_old = 0, n_new = 0;
3956        float local_progress = 0;
3957
3958        if (blk_old_backing && bdrv_opt_mem_align(blk_bs(blk_old_backing)) >
3959            bdrv_opt_mem_align(blk_bs(blk))) {
3960            buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);
3961        } else {
3962            buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3963        }
3964        buf_new = blk_blockalign(blk_new_backing, IO_BUF_SIZE);
3965
3966        size = blk_getlength(blk);
3967        if (size < 0) {
3968            error_report("Could not get size of '%s': %s",
3969                         filename, strerror(-size));
3970            ret = -1;
3971            goto out;
3972        }
3973        if (blk_old_backing) {
3974            old_backing_size = blk_getlength(blk_old_backing);
3975            if (old_backing_size < 0) {
3976                char backing_name[PATH_MAX];
3977
3978                bdrv_get_backing_filename(bs, backing_name,
3979                                          sizeof(backing_name));
3980                error_report("Could not get size of '%s': %s",
3981                             backing_name, strerror(-old_backing_size));
3982                ret = -1;
3983                goto out;
3984            }
3985        }
3986        if (blk_new_backing) {
3987            new_backing_size = blk_getlength(blk_new_backing);
3988            if (new_backing_size < 0) {
3989                error_report("Could not get size of '%s': %s",
3990                             out_baseimg, strerror(-new_backing_size));
3991                ret = -1;
3992                goto out;
3993            }
3994        }
3995
3996        if (size != 0) {
3997            local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3998        }
3999
4000        for (offset = 0; offset < size; offset += n) {
4001            bool old_backing_eof = false;
4002            int64_t n_alloc;
4003
4004            /* How many bytes can we handle with the next read? */
4005            n = MIN(IO_BUF_SIZE, size - offset);
4006
4007            /* If the cluster is allocated, we don't need to take action */
4008            ret = bdrv_is_allocated(unfiltered_bs, offset, n, &n);
4009            if (ret < 0) {
4010                error_report("error while reading image metadata: %s",
4011                             strerror(-ret));
4012                goto out;
4013            }
4014            if (ret) {
4015                continue;
4016            }
4017
4018            if (prefix_chain_bs) {
4019                uint64_t bytes = n;
4020
4021                /*
4022                 * If cluster wasn't changed since prefix_chain, we don't need
4023                 * to take action
4024                 */
4025                ret = bdrv_is_allocated_above(unfiltered_bs_cow,
4026                                              prefix_chain_bs, false,
4027                                              offset, n, &n);
4028                if (ret < 0) {
4029                    error_report("error while reading image metadata: %s",
4030                                 strerror(-ret));
4031                    goto out;
4032                }
4033                if (!ret && n) {
4034                    continue;
4035                }
4036                if (!n) {
4037                    /*
4038                     * If we've reached EOF of the old backing, it means that
4039                     * offsets beyond the old backing size were read as zeroes.
4040                     * Now we will need to explicitly zero the cluster in
4041                     * order to preserve that state after the rebase.
4042                     */
4043                    n = bytes;
4044                }
4045            }
4046
4047            /*
4048             * At this point we know that the region [offset; offset + n)
4049             * is unallocated within the target image.  This region might be
4050             * unaligned to the target image's (sub)cluster boundaries, as
4051             * old backing may have smaller clusters (or have subclusters).
4052             * We extend it to the aligned boundaries to avoid CoW on
4053             * partial writes in blk_pwrite(),
4054             */
4055            n += offset - QEMU_ALIGN_DOWN(offset, write_align);
4056            offset = QEMU_ALIGN_DOWN(offset, write_align);
4057            n += QEMU_ALIGN_UP(offset + n, write_align) - (offset + n);
4058            n = MIN(n, size - offset);
4059            assert(!bdrv_is_allocated(unfiltered_bs, offset, n, &n_alloc) &&
4060                   n_alloc == n);
4061
4062            /*
4063             * Much like with the target image, we'll try to read as much
4064             * of the old and new backings as we can.
4065             */
4066            n_old = MIN(n, MAX(0, old_backing_size - (int64_t) offset));
4067            n_new = MIN(n, MAX(0, new_backing_size - (int64_t) offset));
4068
4069            /*
4070             * Read old and new backing file and take into consideration that
4071             * backing files may be smaller than the COW image.
4072             */
4073            memset(buf_old + n_old, 0, n - n_old);
4074            if (!n_old) {
4075                old_backing_eof = true;
4076            } else {
4077                ret = blk_pread(blk_old_backing, offset, n_old, buf_old, 0);
4078                if (ret < 0) {
4079                    error_report("error while reading from old backing file");
4080                    goto out;
4081                }
4082            }
4083
4084            memset(buf_new + n_new, 0, n - n_new);
4085            if (n_new) {
4086                ret = blk_pread(blk_new_backing, offset, n_new, buf_new, 0);
4087                if (ret < 0) {
4088                    error_report("error while reading from new backing file");
4089                    goto out;
4090                }
4091            }
4092
4093            /* If they differ, we need to write to the COW file */
4094            uint64_t written = 0;
4095
4096            while (written < n) {
4097                int64_t pnum;
4098
4099                if (compare_buffers(buf_old + written, buf_new + written,
4100                                    n - written, write_align, &pnum))
4101                {
4102                    if (old_backing_eof) {
4103                        ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
4104                    } else {
4105                        assert(written + pnum <= IO_BUF_SIZE);
4106                        ret = blk_pwrite(blk, offset + written, pnum,
4107                                         buf_old + written, write_flags);
4108                    }
4109                    if (ret < 0) {
4110                        error_report("Error while writing to COW image: %s",
4111                            strerror(-ret));
4112                        goto out;
4113                    }
4114                }
4115
4116                written += pnum;
4117                if (offset + written >= old_backing_size) {
4118                    old_backing_eof = true;
4119                }
4120            }
4121            qemu_progress_print(local_progress, 100);
4122        }
4123    }
4124
4125    /*
4126     * Change the backing file. All clusters that are different from the old
4127     * backing file are overwritten in the COW file now, so the visible content
4128     * doesn't change when we switch the backing file.
4129     */
4130    if (out_baseimg && *out_baseimg) {
4131        ret = bdrv_change_backing_file(unfiltered_bs, out_baseimg, out_basefmt,
4132                                       true);
4133    } else {
4134        ret = bdrv_change_backing_file(unfiltered_bs, NULL, NULL, false);
4135    }
4136
4137    if (ret == -ENOSPC) {
4138        error_report("Could not change the backing file to '%s': No "
4139                     "space left in the file header", out_baseimg);
4140    } else if (ret == -EINVAL && out_baseimg && !out_basefmt) {
4141        error_report("Could not change the backing file to '%s': backing "
4142                     "format must be specified", out_baseimg);
4143    } else if (ret < 0) {
4144        error_report("Could not change the backing file to '%s': %s",
4145            out_baseimg, strerror(-ret));
4146    }
4147
4148    qemu_progress_print(100, 0);
4149    /*
4150     * TODO At this point it is possible to check if any clusters that are
4151     * allocated in the COW file are the same in the backing file. If so, they
4152     * could be dropped from the COW file. Don't do this before switching the
4153     * backing file, in case of a crash this would lead to corruption.
4154     */
4155out:
4156    qemu_progress_end();
4157    /* Cleanup */
4158    if (!unsafe) {
4159        blk_unref(blk_old_backing);
4160        blk_unref(blk_new_backing);
4161    }
4162    qemu_vfree(buf_old);
4163    qemu_vfree(buf_new);
4164
4165    blk_unref(blk);
4166    if (ret) {
4167        return 1;
4168    }
4169    return 0;
4170}
4171
4172static int img_resize(const img_cmd_t *ccmd, int argc, char **argv)
4173{
4174    Error *err = NULL;
4175    int c, ret, relative;
4176    const char *filename = NULL, *fmt = NULL, *size = NULL;
4177    int64_t n, total_size, current_size;
4178    bool quiet = false;
4179    BlockBackend *blk = NULL;
4180    PreallocMode prealloc = PREALLOC_MODE_OFF;
4181    QemuOpts *param;
4182
4183    static QemuOptsList resize_options = {
4184        .name = "resize_options",
4185        .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
4186        .desc = {
4187            {
4188                .name = BLOCK_OPT_SIZE,
4189                .type = QEMU_OPT_SIZE,
4190                .help = "Virtual disk size"
4191            }, {
4192                /* end of list */
4193            }
4194        },
4195    };
4196    bool image_opts = false;
4197    bool shrink = false;
4198
4199    /* Parse getopt arguments */
4200    for(;;) {
4201        static const struct option long_options[] = {
4202            {"help", no_argument, 0, 'h'},
4203            {"format", required_argument, 0, 'f'},
4204            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4205            {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
4206            {"shrink", no_argument, 0, OPTION_SHRINK},
4207            {"quiet", no_argument, 0, 'q'},
4208            {"object", required_argument, 0, OPTION_OBJECT},
4209            {0, 0, 0, 0}
4210        };
4211        c = getopt_long(argc, argv, "-hf:q",
4212                        long_options, NULL);
4213        if (c == -1) {
4214            break;
4215        }
4216        switch(c) {
4217        case 'h':
4218            cmd_help(ccmd, "[-f FMT | --image-opts] [--preallocation PREALLOC] [--shrink]\n"
4219"        [-q] [--object OBJDEF] FILE [+-]SIZE[bkKMGTPE]\n"
4220,
4221"  -f, --format FMT\n"
4222"     specify FILE format explicitly (default: probing is used)\n"
4223"  --image-opts\n"
4224"     treat FILE as an option string (key=value,...), not a file name\n"
4225"     (incompatible with -f|--format)\n"
4226"  --shrink\n"
4227"     allow operation when the new size is smaller than the original\n"
4228"  --preallocation PREALLOC\n"
4229"     specify FMT-specific preallocation type for the new areas\n"
4230"  -q, --quiet\n"
4231"     quiet mode (produce only error messages if any)\n"
4232"  --object OBJDEF\n"
4233"     defines QEMU user-creatable object\n"
4234"  FILE\n"
4235"     name of the image file, or option string (key=value,..)\n"
4236"     with --image-opts, to operate on\n"
4237"  [+-]SIZE[bkKMGTPE]\n"
4238"     new image size or amount by which to shrink (-)/grow (+),\n"
4239"     with optional multiplier suffix (powers of 1024, default is bytes)\n"
4240);
4241            return 0;
4242        case 'f':
4243            fmt = optarg;
4244            break;
4245        case OPTION_IMAGE_OPTS:
4246            image_opts = true;
4247            break;
4248        case OPTION_PREALLOCATION:
4249            prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
4250                                       PREALLOC_MODE__MAX, NULL);
4251            if (prealloc == PREALLOC_MODE__MAX) {
4252                error_report("Invalid preallocation mode '%s'", optarg);
4253                return 1;
4254            }
4255            break;
4256        case OPTION_SHRINK:
4257            shrink = true;
4258            break;
4259        case 'q':
4260            quiet = true;
4261            break;
4262        case OPTION_OBJECT:
4263            user_creatable_process_cmdline(optarg);
4264            break;
4265        case 1: /* a non-optional argument */
4266            if (!filename) {
4267                filename = optarg;
4268                /* see if we have -size (number) next to filename */
4269                if (optind < argc) {
4270                    size = argv[optind];
4271                    if (size[0] == '-' && size[1] >= '0' && size[1] <= '9') {
4272                        ++optind;
4273                    } else {
4274                        size = NULL;
4275                    }
4276                }
4277            } else if (!size) {
4278                size = optarg;
4279            } else {
4280                error_exit(argv[0], "Extra argument(s) in command line");
4281            }
4282            break;
4283        default:
4284            tryhelp(argv[0]);
4285        }
4286    }
4287    if (!filename && optind < argc) {
4288        filename = argv[optind++];
4289    }
4290    if (!size && optind < argc) {
4291        size = argv[optind++];
4292    }
4293    if (!filename || !size || optind < argc) {
4294        error_exit(argv[0], "Expecting image file name and size");
4295    }
4296
4297    /* Choose grow, shrink, or absolute resize mode */
4298    switch (size[0]) {
4299    case '+':
4300        relative = 1;
4301        size++;
4302        break;
4303    case '-':
4304        relative = -1;
4305        size++;
4306        break;
4307    default:
4308        relative = 0;
4309        break;
4310    }
4311
4312    /* Parse size */
4313    param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
4314    if (!qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err)) {
4315        error_report_err(err);
4316        ret = -1;
4317        qemu_opts_del(param);
4318        goto out;
4319    }
4320    n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
4321    qemu_opts_del(param);
4322
4323    blk = img_open(image_opts, filename, fmt,
4324                   BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
4325                   false);
4326    if (!blk) {
4327        ret = -1;
4328        goto out;
4329    }
4330
4331    current_size = blk_getlength(blk);
4332    if (current_size < 0) {
4333        error_report("Failed to inquire current image length: %s",
4334                     strerror(-current_size));
4335        ret = -1;
4336        goto out;
4337    }
4338
4339    if (relative) {
4340        total_size = current_size + n * relative;
4341    } else {
4342        total_size = n;
4343    }
4344    if (total_size <= 0) {
4345        error_report("New image size must be positive");
4346        ret = -1;
4347        goto out;
4348    }
4349
4350    if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
4351        error_report("Preallocation can only be used for growing images");
4352        ret = -1;
4353        goto out;
4354    }
4355
4356    if (total_size < current_size && !shrink) {
4357        error_report("Use the --shrink option to perform a shrink operation.");
4358        warn_report("Shrinking an image will delete all data beyond the "
4359                    "shrunken image's end. Before performing such an "
4360                    "operation, make sure there is no important data there.");
4361        ret = -1;
4362        goto out;
4363    }
4364
4365    /*
4366     * The user expects the image to have the desired size after
4367     * resizing, so pass @exact=true.  It is of no use to report
4368     * success when the image has not actually been resized.
4369     */
4370    ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
4371    if (!ret) {
4372        qprintf(quiet, "Image resized.\n");
4373    } else {
4374        error_report_err(err);
4375    }
4376out:
4377    blk_unref(blk);
4378    if (ret) {
4379        return 1;
4380    }
4381    return 0;
4382}
4383
4384static void amend_status_cb(BlockDriverState *bs,
4385                            int64_t offset, int64_t total_work_size,
4386                            void *opaque)
4387{
4388    qemu_progress_print(100.f * offset / total_work_size, 0);
4389}
4390
4391static int print_amend_option_help(const char *format)
4392{
4393    BlockDriver *drv;
4394
4395    GRAPH_RDLOCK_GUARD_MAINLOOP();
4396
4397    /* Find driver and parse its options */
4398    drv = bdrv_find_format(format);
4399    if (!drv) {
4400        error_report("Unknown file format '%s'", format);
4401        return 1;
4402    }
4403
4404    if (!drv->bdrv_amend_options) {
4405        error_report("Format driver '%s' does not support option amendment",
4406                     format);
4407        return 1;
4408    }
4409
4410    /* Every driver supporting amendment must have amend_opts */
4411    assert(drv->amend_opts);
4412
4413    printf("Amend options for '%s':\n", format);
4414    qemu_opts_print_help(drv->amend_opts, false);
4415    return 0;
4416}
4417
4418static int img_amend(const img_cmd_t *ccmd, int argc, char **argv)
4419{
4420    Error *err = NULL;
4421    int c, ret = 0;
4422    char *options = NULL;
4423    QemuOptsList *amend_opts = NULL;
4424    QemuOpts *opts = NULL;
4425    const char *fmt = NULL, *filename, *cache;
4426    int flags;
4427    bool writethrough;
4428    bool quiet = false, progress = false;
4429    BlockBackend *blk = NULL;
4430    BlockDriverState *bs = NULL;
4431    bool image_opts = false;
4432    bool force = false;
4433
4434    cache = BDRV_DEFAULT_CACHE;
4435    for (;;) {
4436        static const struct option long_options[] = {
4437            {"help", no_argument, 0, 'h'},
4438            {"options", required_argument, 0, 'o'},
4439            {"format", required_argument, 0, 'f'},
4440            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4441            {"cache", required_argument, 0, 't'},
4442            {"force", no_argument, 0, OPTION_FORCE},
4443            {"progress", no_argument, 0, 'p'},
4444            {"quiet", no_argument, 0, 'q'},
4445            {"object", required_argument, 0, OPTION_OBJECT},
4446            {0, 0, 0, 0}
4447        };
4448        c = getopt_long(argc, argv, "ho:f:t:pq",
4449                        long_options, NULL);
4450        if (c == -1) {
4451            break;
4452        }
4453
4454        switch (c) {
4455        case 'h':
4456            cmd_help(ccmd, "-o FMT_OPTS [-f FMT | --image-opts]\n"
4457"        [-t CACHE] [--force] [-p] [-q] [--object OBJDEF] FILE\n"
4458,
4459"  -o, --options FMT_OPTS\n"
4460"     FMT-specfic format options (required)\n"
4461"  -f, --format FMT\n"
4462"     specify FILE format explicitly (default: probing is used)\n"
4463"  --image-opts\n"
4464"     treat FILE as an option string (key=value,..), not a file name\n"
4465"     (incompatible with -f|--format)\n"
4466"  -t, --cache CACHE\n"
4467"     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
4468"  --force\n"
4469"     allow certain unsafe operations\n"
4470"  -p, --progres\n"
4471"     show operation progress\n"
4472"  -q, --quiet\n"
4473"     quiet mode (produce only error messages if any)\n"
4474"  --object OBJDEF\n"
4475"     defines QEMU user-creatable object\n"
4476"  FILE\n"
4477"     name of the image file, or option string (key=value,..)\n"
4478"     with --image-opts, to operate on\n"
4479);
4480            break;
4481        case 'o':
4482            if (accumulate_options(&options, optarg) < 0) {
4483                ret = -1;
4484                goto out_no_progress;
4485            }
4486            break;
4487        case 'f':
4488            fmt = optarg;
4489            break;
4490        case OPTION_IMAGE_OPTS:
4491            image_opts = true;
4492            break;
4493        case 't':
4494            cache = optarg;
4495            break;
4496        case OPTION_FORCE:
4497            force = true;
4498            break;
4499        case 'p':
4500            progress = true;
4501            break;
4502        case 'q':
4503            quiet = true;
4504            break;
4505        case OPTION_OBJECT:
4506            user_creatable_process_cmdline(optarg);
4507            break;
4508        default:
4509            tryhelp(argv[0]);
4510        }
4511    }
4512
4513    if (!options) {
4514        error_exit(argv[0], "Must specify options (-o)");
4515    }
4516
4517    if (quiet) {
4518        progress = false;
4519    }
4520    qemu_progress_init(progress, 1.0);
4521
4522    filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4523    if (fmt && has_help_option(options)) {
4524        /* If a format is explicitly specified (and possibly no filename is
4525         * given), print option help here */
4526        ret = print_amend_option_help(fmt);
4527        goto out;
4528    }
4529
4530    if (optind != argc - 1) {
4531        error_report("Expecting one image file name");
4532        ret = -1;
4533        goto out;
4534    }
4535
4536    flags = BDRV_O_RDWR;
4537    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4538    if (ret < 0) {
4539        error_report("Invalid cache option: %s", cache);
4540        goto out;
4541    }
4542
4543    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4544                   false);
4545    if (!blk) {
4546        ret = -1;
4547        goto out;
4548    }
4549    bs = blk_bs(blk);
4550
4551    fmt = bs->drv->format_name;
4552
4553    if (has_help_option(options)) {
4554        /* If the format was auto-detected, print option help here */
4555        ret = print_amend_option_help(fmt);
4556        goto out;
4557    }
4558
4559    bdrv_graph_rdlock_main_loop();
4560    if (!bs->drv->bdrv_amend_options) {
4561        error_report("Format driver '%s' does not support option amendment",
4562                     fmt);
4563        bdrv_graph_rdunlock_main_loop();
4564        ret = -1;
4565        goto out;
4566    }
4567
4568    /* Every driver supporting amendment must have amend_opts */
4569    assert(bs->drv->amend_opts);
4570
4571    amend_opts = qemu_opts_append(amend_opts, bs->drv->amend_opts);
4572    opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4573    if (!qemu_opts_do_parse(opts, options, NULL, &err)) {
4574        /* Try to parse options using the create options */
4575        amend_opts = qemu_opts_append(amend_opts, bs->drv->create_opts);
4576        qemu_opts_del(opts);
4577        opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4578        if (qemu_opts_do_parse(opts, options, NULL, NULL)) {
4579            error_append_hint(&err,
4580                              "This option is only supported for image creation\n");
4581        }
4582
4583        bdrv_graph_rdunlock_main_loop();
4584        error_report_err(err);
4585        ret = -1;
4586        goto out;
4587    }
4588
4589    /* In case the driver does not call amend_status_cb() */
4590    qemu_progress_print(0.f, 0);
4591    ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
4592    qemu_progress_print(100.f, 0);
4593    bdrv_graph_rdunlock_main_loop();
4594
4595    if (ret < 0) {
4596        error_report_err(err);
4597        goto out;
4598    }
4599
4600out:
4601    qemu_progress_end();
4602
4603out_no_progress:
4604    blk_unref(blk);
4605    qemu_opts_del(opts);
4606    qemu_opts_free(amend_opts);
4607    g_free(options);
4608
4609    if (ret) {
4610        return 1;
4611    }
4612    return 0;
4613}
4614
4615typedef struct BenchData {
4616    BlockBackend *blk;
4617    uint64_t image_size;
4618    bool write;
4619    int bufsize;
4620    int step;
4621    int nrreq;
4622    int n;
4623    int flush_interval;
4624    bool drain_on_flush;
4625    uint8_t *buf;
4626    QEMUIOVector *qiov;
4627
4628    int in_flight;
4629    bool in_flush;
4630    uint64_t offset;
4631} BenchData;
4632
4633static void bench_undrained_flush_cb(void *opaque, int ret)
4634{
4635    if (ret < 0) {
4636        error_report("Failed flush request: %s", strerror(-ret));
4637        exit(EXIT_FAILURE);
4638    }
4639}
4640
4641static void bench_cb(void *opaque, int ret)
4642{
4643    BenchData *b = opaque;
4644    BlockAIOCB *acb;
4645
4646    if (ret < 0) {
4647        error_report("Failed request: %s", strerror(-ret));
4648        exit(EXIT_FAILURE);
4649    }
4650
4651    if (b->in_flush) {
4652        /* Just finished a flush with drained queue: Start next requests */
4653        assert(b->in_flight == 0);
4654        b->in_flush = false;
4655    } else if (b->in_flight > 0) {
4656        int remaining = b->n - b->in_flight;
4657
4658        b->n--;
4659        b->in_flight--;
4660
4661        /* Time for flush? Drain queue if requested, then flush */
4662        if (b->flush_interval && remaining % b->flush_interval == 0) {
4663            if (!b->in_flight || !b->drain_on_flush) {
4664                BlockCompletionFunc *cb;
4665
4666                if (b->drain_on_flush) {
4667                    b->in_flush = true;
4668                    cb = bench_cb;
4669                } else {
4670                    cb = bench_undrained_flush_cb;
4671                }
4672
4673                acb = blk_aio_flush(b->blk, cb, b);
4674                if (!acb) {
4675                    error_report("Failed to issue flush request");
4676                    exit(EXIT_FAILURE);
4677                }
4678            }
4679            if (b->drain_on_flush) {
4680                return;
4681            }
4682        }
4683    }
4684
4685    while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4686        int64_t offset = b->offset;
4687        /* blk_aio_* might look for completed I/Os and kick bench_cb
4688         * again, so make sure this operation is counted by in_flight
4689         * and b->offset is ready for the next submission.
4690         */
4691        b->in_flight++;
4692        b->offset += b->step;
4693        if (b->image_size <= b->bufsize) {
4694            b->offset = 0;
4695        } else {
4696            b->offset %= b->image_size - b->bufsize;
4697        }
4698        if (b->write) {
4699            acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4700        } else {
4701            acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4702        }
4703        if (!acb) {
4704            error_report("Failed to issue request");
4705            exit(EXIT_FAILURE);
4706        }
4707    }
4708}
4709
4710static int img_bench(const img_cmd_t *ccmd, int argc, char **argv)
4711{
4712    int c, ret = 0;
4713    const char *fmt = NULL, *filename;
4714    bool quiet = false;
4715    bool image_opts = false;
4716    bool is_write = false;
4717    int count = 75000;
4718    int depth = 64;
4719    int64_t offset = 0;
4720    ssize_t bufsize = 4096;
4721    int pattern = 0;
4722    ssize_t step = 0;
4723    int flush_interval = 0;
4724    bool drain_on_flush = true;
4725    int64_t image_size;
4726    BlockBackend *blk = NULL;
4727    BenchData data = {};
4728    int flags = 0;
4729    bool writethrough = false;
4730    struct timeval t1, t2;
4731    int i;
4732    bool force_share = false;
4733    size_t buf_size = 0;
4734
4735    for (;;) {
4736        static const struct option long_options[] = {
4737            {"help", no_argument, 0, 'h'},
4738            {"format", required_argument, 0, 'f'},
4739            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4740            {"cache", required_argument, 0, 't'},
4741            {"count", required_argument, 0, 'c'},
4742            {"depth", required_argument, 0, 'd'},
4743            {"offset", required_argument, 0, 'o'},
4744            {"buffer-size", required_argument, 0, 's'},
4745            {"step-size", required_argument, 0, 'S'},
4746            {"write", no_argument, 0, 'w'},
4747            {"pattern", required_argument, 0, OPTION_PATTERN},
4748            {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4749            {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4750            {"aio", required_argument, 0, 'i'},
4751            {"native", no_argument, 0, 'n'},
4752            {"force-share", no_argument, 0, 'U'},
4753            {"quiet", no_argument, 0, 'q'},
4754            {"object", required_argument, 0, OPTION_OBJECT},
4755            {0, 0, 0, 0}
4756        };
4757        c = getopt_long(argc, argv, "hf:t:c:d:o:s:S:wi:nUq",
4758                        long_options, NULL);
4759        if (c == -1) {
4760            break;
4761        }
4762
4763        switch (c) {
4764        case 'h':
4765            cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE]\n"
4766"        [-c COUNT] [-d DEPTH] [-o OFFSET] [-s BUFFER_SIZE] [-S STEP_SIZE]\n"
4767"        [-w [--pattern PATTERN] [--flush-interval INTERVAL [--no-drain]]]\n"
4768"        [-i AIO] [-n] [-U] [-q] FILE\n"
4769,
4770"  -f, --format FMT\n"
4771"     specify FILE format explicitly\n"
4772"  --image-opts\n"
4773"     indicates that FILE is a complete image specification\n"
4774"     instead of a file name (incompatible with --format)\n"
4775"  -t, --cache CACHE\n"
4776"     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
4777"  -c, --count COUNT\n"
4778"     number of I/O requests to perform\n"
4779"  -d, --depth DEPTH\n"
4780"     number of requests to perform in parallel\n"
4781"  -o, --offset OFFSET\n"
4782"     start first request at this OFFSET\n"
4783"  -s, --buffer-size BUFFER_SIZE[bkKMGTPE]\n"
4784"     size of each I/O request, with optional multiplier suffix\n"
4785"     (powers of 1024, default is 4K)\n"
4786"  -S, --step-size STEP_SIZE[bkKMGTPE]\n"
4787"     each next request offset increment, with optional multiplier suffix\n"
4788"     (powers of 1024, default is the same as BUFFER_SIZE)\n"
4789"  -w, --write\n"
4790"     perform write test (default is read)\n"
4791"  --pattern PATTERN\n"
4792"     write this pattern byte instead of zero\n"
4793"  --flush-interval FLUSH_INTERVAL\n"
4794"     issue flush after this number of requests\n"
4795"  --no-drain\n"
4796"     do not wait when flushing pending requests\n"
4797"  -i, --aio AIO\n"
4798"     async-io backend (threads, native, io_uring)\n"
4799"  -n, --native\n"
4800"     use native AIO backend if possible\n"
4801"  -U, --force-share\n"
4802"     open images in shared mode for concurrent access\n"
4803"  -q, --quiet\n"
4804"     quiet mode (produce only error messages if any)\n"
4805"  --object OBJDEF\n"
4806"     defines QEMU user-creatable object\n"
4807"  FILE\n"
4808"     name of the image file, or option string (key=value,..)\n"
4809"     with --image-opts, to operate on\n"
4810);
4811            break;
4812        case 'f':
4813            fmt = optarg;
4814            break;
4815        case OPTION_IMAGE_OPTS:
4816            image_opts = true;
4817            break;
4818        case 't':
4819            ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4820            if (ret < 0) {
4821                error_report("Invalid cache mode");
4822                ret = -1;
4823                goto out;
4824            }
4825            break;
4826        case 'c':
4827            count = cvtnum_full("request count", optarg, false, 1, INT_MAX);
4828            if (count < 0) {
4829                return 1;
4830            }
4831            break;
4832        case 'd':
4833            depth = cvtnum_full("queue depth", optarg, false, 1, INT_MAX);
4834            if (depth < 0) {
4835                return 1;
4836            }
4837            break;
4838        case 'n':
4839            flags |= BDRV_O_NATIVE_AIO;
4840            break;
4841        case 'i':
4842            ret = bdrv_parse_aio(optarg, &flags);
4843            if (ret < 0) {
4844                error_report("Invalid aio option: %s", optarg);
4845                ret = -1;
4846                goto out;
4847            }
4848            break;
4849        case 'o':
4850            offset = cvtnum("offset", optarg, true);
4851            if (offset < 0) {
4852                return 1;
4853            }
4854            break;
4855        case 's':
4856            bufsize = cvtnum_full("buffer size", optarg, true, 1, INT_MAX);
4857            if (bufsize < 0) {
4858                return 1;
4859            }
4860            break;
4861        case 'S':
4862            step = cvtnum_full("step size", optarg, true, 0, INT_MAX);
4863            if (step < 0) {
4864                return 1;
4865            }
4866            break;
4867        case 'w':
4868            flags |= BDRV_O_RDWR;
4869            is_write = true;
4870            break;
4871        case OPTION_PATTERN:
4872            pattern = cvtnum_full("pattern byte", optarg, false, 0, 0xff);
4873            if (pattern < 0) {
4874                return 1;
4875            }
4876            break;
4877        case OPTION_FLUSH_INTERVAL:
4878            flush_interval = cvtnum_full("flush interval", optarg,
4879                                         false, 0, INT_MAX);
4880            if (flush_interval < 0) {
4881                return 1;
4882            }
4883            break;
4884        case OPTION_NO_DRAIN:
4885            drain_on_flush = false;
4886            break;
4887        case 'U':
4888            force_share = true;
4889            break;
4890        case 'q':
4891            quiet = true;
4892            break;
4893        case OPTION_OBJECT:
4894            user_creatable_process_cmdline(optarg);
4895            break;
4896        default:
4897            tryhelp(argv[0]);
4898        }
4899    }
4900
4901    if (optind != argc - 1) {
4902        error_exit(argv[0], "Expecting one image file name");
4903    }
4904    filename = argv[argc - 1];
4905
4906    if (!is_write && flush_interval) {
4907        error_report("--flush-interval is only available in write tests");
4908        ret = -1;
4909        goto out;
4910    }
4911    if (flush_interval && flush_interval < depth) {
4912        error_report("Flush interval can't be smaller than depth");
4913        ret = -1;
4914        goto out;
4915    }
4916
4917    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4918                   force_share);
4919    if (!blk) {
4920        ret = -1;
4921        goto out;
4922    }
4923
4924    image_size = blk_getlength(blk);
4925    if (image_size < 0) {
4926        ret = image_size;
4927        goto out;
4928    }
4929
4930    data = (BenchData) {
4931        .blk            = blk,
4932        .image_size     = image_size,
4933        .bufsize        = bufsize,
4934        .step           = step ?: bufsize,
4935        .nrreq          = depth,
4936        .n              = count,
4937        .offset         = offset,
4938        .write          = is_write,
4939        .flush_interval = flush_interval,
4940        .drain_on_flush = drain_on_flush,
4941    };
4942    printf("Sending %d %s requests, %d bytes each, %d in parallel "
4943           "(starting at offset %" PRId64 ", step size %d)\n",
4944           data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4945           data.offset, data.step);
4946    if (flush_interval) {
4947        printf("Sending flush every %d requests\n", flush_interval);
4948    }
4949
4950    buf_size = data.nrreq * data.bufsize;
4951    data.buf = blk_blockalign(blk, buf_size);
4952    memset(data.buf, pattern, data.nrreq * data.bufsize);
4953
4954    blk_register_buf(blk, data.buf, buf_size, &error_fatal);
4955
4956    data.qiov = g_new(QEMUIOVector, data.nrreq);
4957    for (i = 0; i < data.nrreq; i++) {
4958        qemu_iovec_init(&data.qiov[i], 1);
4959        qemu_iovec_add(&data.qiov[i],
4960                       data.buf + i * data.bufsize, data.bufsize);
4961    }
4962
4963    gettimeofday(&t1, NULL);
4964    bench_cb(&data, 0);
4965
4966    while (data.n > 0) {
4967        main_loop_wait(false);
4968    }
4969    gettimeofday(&t2, NULL);
4970
4971    printf("Run completed in %3.3f seconds.\n",
4972           (t2.tv_sec - t1.tv_sec)
4973           + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4974
4975out:
4976    if (data.buf) {
4977        blk_unregister_buf(blk, data.buf, buf_size);
4978    }
4979    qemu_vfree(data.buf);
4980    blk_unref(blk);
4981
4982    if (ret) {
4983        return 1;
4984    }
4985    return 0;
4986}
4987
4988enum ImgBitmapAct {
4989    BITMAP_ADD,
4990    BITMAP_REMOVE,
4991    BITMAP_CLEAR,
4992    BITMAP_ENABLE,
4993    BITMAP_DISABLE,
4994    BITMAP_MERGE,
4995};
4996typedef struct ImgBitmapAction {
4997    enum ImgBitmapAct act;
4998    const char *src; /* only used for merge */
4999    QSIMPLEQ_ENTRY(ImgBitmapAction) next;
5000} ImgBitmapAction;
5001
5002static int img_bitmap(const img_cmd_t *ccmd, int argc, char **argv)
5003{
5004    Error *err = NULL;
5005    int c, ret = 1;
5006    QemuOpts *opts = NULL;
5007    const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL;
5008    const char *filename, *bitmap;
5009    BlockBackend *blk = NULL, *src = NULL;
5010    BlockDriverState *bs = NULL, *src_bs = NULL;
5011    bool image_opts = false;
5012    int64_t granularity = 0;
5013    bool add = false, merge = false;
5014    QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
5015    ImgBitmapAction *act, *act_next;
5016    const char *op;
5017    int inactivate_ret;
5018
5019    QSIMPLEQ_INIT(&actions);
5020
5021    for (;;) {
5022        static const struct option long_options[] = {
5023            {"help", no_argument, 0, 'h'},
5024            {"format", required_argument, 0, 'f'},
5025            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5026            {"add", no_argument, 0, OPTION_ADD},
5027            {"granularity", required_argument, 0, 'g'},
5028            {"remove", no_argument, 0, OPTION_REMOVE},
5029            {"clear", no_argument, 0, OPTION_CLEAR},
5030            {"enable", no_argument, 0, OPTION_ENABLE},
5031            {"disable", no_argument, 0, OPTION_DISABLE},
5032            {"merge", required_argument, 0, OPTION_MERGE},
5033            {"source-file", required_argument, 0, 'b'},
5034            {"source-format", required_argument, 0, 'F'},
5035            {"object", required_argument, 0, OPTION_OBJECT},
5036            {0, 0, 0, 0}
5037        };
5038        c = getopt_long(argc, argv, "hf:g:b:F:",
5039                        long_options, NULL);
5040        if (c == -1) {
5041            break;
5042        }
5043
5044        switch (c) {
5045        case 'h':
5046            cmd_help(ccmd, "[-f FMT | --image-opts]\n"
5047"        ( --add [-g SIZE] | --remove | --clear | --enable | --disable |\n"
5048"          --merge SOURCE [-b SRC_FILE [-F SRC_FMT]] )..\n"
5049"        [--object OBJDEF] FILE BITMAP\n"
5050,
5051"  -f, --format FMT\n"
5052"     specify FILE format explicitly (default: probing is used)\n"
5053"  --image-opts\n"
5054"     treat FILE as an option string (key=value,..), not a file name\n"
5055"     (incompatible with -f|--format)\n"
5056"  --add\n"
5057"     creates BITMAP in FILE, enables to record future edits\n"
5058"  -g, --granularity SIZE[bKMGTPE]\n"
5059"     sets non-default granularity for the bitmap being added,\n"
5060"     with optional multiplier suffix (in powers of 1024)\n"
5061"  --remove\n"
5062"     removes BITMAP from FILE\n"
5063"  --clear\n"
5064"     clears BITMAP in FILE\n"
5065"  --enable, --disable\n"
5066"     starts and stops recording future edits to BITMAP in FILE\n"
5067"  --merge SOURCE\n"
5068"     merges contents of the SOURCE bitmap into BITMAP in FILE\n"
5069"  -b, --source-file SRC_FILE\n"
5070"     select alternative source file for --merge\n"
5071"  -F, --source-format SRC_FMT\n"
5072"     specify format for SRC_FILE explicitly\n"
5073"  --object OBJDEF\n"
5074"     defines QEMU user-creatable object\n"
5075"  FILE\n"
5076"     name of the image file, or option string (key=value,..)\n"
5077"     with --image-opts, to operate on\n"
5078"  BITMAP\n"
5079"     name of the bitmap to add, remove, clear, enable, disable or merge to\n"
5080);
5081            break;
5082        case 'f':
5083            fmt = optarg;
5084            break;
5085        case OPTION_IMAGE_OPTS:
5086            image_opts = true;
5087            break;
5088        case OPTION_ADD:
5089            act = g_new0(ImgBitmapAction, 1);
5090            act->act = BITMAP_ADD;
5091            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5092            add = true;
5093            break;
5094        case 'g':
5095            granularity = cvtnum("granularity", optarg, true);
5096            if (granularity < 0) {
5097                return 1;
5098            }
5099            break;
5100        case OPTION_REMOVE:
5101            act = g_new0(ImgBitmapAction, 1);
5102            act->act = BITMAP_REMOVE;
5103            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5104            break;
5105        case OPTION_CLEAR:
5106            act = g_new0(ImgBitmapAction, 1);
5107            act->act = BITMAP_CLEAR;
5108            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5109            break;
5110        case OPTION_ENABLE:
5111            act = g_new0(ImgBitmapAction, 1);
5112            act->act = BITMAP_ENABLE;
5113            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5114            break;
5115        case OPTION_DISABLE:
5116            act = g_new0(ImgBitmapAction, 1);
5117            act->act = BITMAP_DISABLE;
5118            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5119            break;
5120        case OPTION_MERGE:
5121            act = g_new0(ImgBitmapAction, 1);
5122            act->act = BITMAP_MERGE;
5123            act->src = optarg;
5124            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5125            merge = true;
5126            break;
5127        case 'b':
5128            src_filename = optarg;
5129            break;
5130        case 'F':
5131            src_fmt = optarg;
5132            break;
5133        case OPTION_OBJECT:
5134            user_creatable_process_cmdline(optarg);
5135            break;
5136        default:
5137            tryhelp(argv[0]);
5138        }
5139    }
5140
5141    if (QSIMPLEQ_EMPTY(&actions)) {
5142        error_report("Need at least one of --add, --remove, --clear, "
5143                     "--enable, --disable, or --merge");
5144        goto out;
5145    }
5146
5147    if (granularity && !add) {
5148        error_report("granularity only supported with --add");
5149        goto out;
5150    }
5151    if (src_fmt && !src_filename) {
5152        error_report("-F only supported with -b");
5153        goto out;
5154    }
5155    if (src_filename && !merge) {
5156        error_report("Merge bitmap source file only supported with "
5157                     "--merge");
5158        goto out;
5159    }
5160
5161    if (optind != argc - 2) {
5162        error_report("Expecting filename and bitmap name");
5163        goto out;
5164    }
5165
5166    filename = argv[optind];
5167    bitmap = argv[optind + 1];
5168
5169    /*
5170     * No need to open backing chains; we will be manipulating bitmaps
5171     * directly in this image without reference to image contents.
5172     */
5173    blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING,
5174                   false, false, false);
5175    if (!blk) {
5176        goto out;
5177    }
5178    bs = blk_bs(blk);
5179    if (src_filename) {
5180        src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING,
5181                       false, false, false);
5182        if (!src) {
5183            goto out;
5184        }
5185        src_bs = blk_bs(src);
5186    } else {
5187        src_bs = bs;
5188    }
5189
5190    QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) {
5191        switch (act->act) {
5192        case BITMAP_ADD:
5193            qmp_block_dirty_bitmap_add(bs->node_name, bitmap,
5194                                       !!granularity, granularity, true, true,
5195                                       false, false, &err);
5196            op = "add";
5197            break;
5198        case BITMAP_REMOVE:
5199            qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err);
5200            op = "remove";
5201            break;
5202        case BITMAP_CLEAR:
5203            qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err);
5204            op = "clear";
5205            break;
5206        case BITMAP_ENABLE:
5207            qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err);
5208            op = "enable";
5209            break;
5210        case BITMAP_DISABLE:
5211            qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err);
5212            op = "disable";
5213            break;
5214        case BITMAP_MERGE:
5215            do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name,
5216                                  act->src, &err);
5217            op = "merge";
5218            break;
5219        default:
5220            g_assert_not_reached();
5221        }
5222
5223        if (err) {
5224            error_reportf_err(err, "Operation %s on bitmap %s failed: ",
5225                              op, bitmap);
5226            goto out;
5227        }
5228        g_free(act);
5229    }
5230
5231    ret = 0;
5232
5233 out:
5234    /*
5235     * Manually inactivate the images first because this way we can know whether
5236     * an error occurred. blk_unref() doesn't tell us about failures.
5237     */
5238    inactivate_ret = bdrv_inactivate_all();
5239    if (inactivate_ret < 0) {
5240        error_report("Error while closing the image: %s", strerror(-inactivate_ret));
5241        ret = 1;
5242    }
5243
5244    blk_unref(src);
5245    blk_unref(blk);
5246    qemu_opts_del(opts);
5247    return ret;
5248}
5249
5250#define C_BS      01
5251#define C_COUNT   02
5252#define C_IF      04
5253#define C_OF      010
5254#define C_SKIP    020
5255
5256struct DdInfo {
5257    unsigned int flags;
5258    int64_t count;
5259};
5260
5261struct DdIo {
5262    int bsz;    /* Block size */
5263    char *filename;
5264    uint8_t *buf;
5265    int64_t offset;
5266};
5267
5268struct DdOpts {
5269    const char *name;
5270    int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
5271    unsigned int flag;
5272};
5273
5274static int img_dd_bs(const char *arg,
5275                     struct DdIo *in, struct DdIo *out,
5276                     struct DdInfo *dd)
5277{
5278    int64_t res;
5279
5280    res = cvtnum_full("bs", arg, true, 1, INT_MAX);
5281
5282    if (res < 0) {
5283        return 1;
5284    }
5285    in->bsz = out->bsz = res;
5286
5287    return 0;
5288}
5289
5290static int img_dd_count(const char *arg,
5291                        struct DdIo *in, struct DdIo *out,
5292                        struct DdInfo *dd)
5293{
5294    dd->count = cvtnum("count", arg, true);
5295
5296    if (dd->count < 0) {
5297        return 1;
5298    }
5299
5300    return 0;
5301}
5302
5303static int img_dd_if(const char *arg,
5304                     struct DdIo *in, struct DdIo *out,
5305                     struct DdInfo *dd)
5306{
5307    in->filename = g_strdup(arg);
5308
5309    return 0;
5310}
5311
5312static int img_dd_of(const char *arg,
5313                     struct DdIo *in, struct DdIo *out,
5314                     struct DdInfo *dd)
5315{
5316    out->filename = g_strdup(arg);
5317
5318    return 0;
5319}
5320
5321static int img_dd_skip(const char *arg,
5322                       struct DdIo *in, struct DdIo *out,
5323                       struct DdInfo *dd)
5324{
5325    in->offset = cvtnum("skip", arg, true);
5326
5327    if (in->offset < 0) {
5328        return 1;
5329    }
5330
5331    return 0;
5332}
5333
5334static int img_dd(const img_cmd_t *ccmd, int argc, char **argv)
5335{
5336    int ret = 0;
5337    char *arg = NULL;
5338    char *tmp;
5339    BlockDriver *drv = NULL, *proto_drv = NULL;
5340    BlockBackend *blk1 = NULL, *blk2 = NULL;
5341    QemuOpts *opts = NULL;
5342    QemuOptsList *create_opts = NULL;
5343    Error *local_err = NULL;
5344    bool image_opts = false;
5345    int c, i;
5346    const char *out_fmt = "raw";
5347    const char *fmt = NULL;
5348    int64_t size = 0;
5349    int64_t out_pos, in_pos;
5350    bool force_share = false;
5351    struct DdInfo dd = {
5352        .flags = 0,
5353        .count = 0,
5354    };
5355    struct DdIo in = {
5356        .bsz = 512, /* Block size is by default 512 bytes */
5357        .filename = NULL,
5358        .buf = NULL,
5359        .offset = 0
5360    };
5361    struct DdIo out = {
5362        .bsz = 512,
5363        .filename = NULL,
5364        .buf = NULL,
5365        .offset = 0
5366    };
5367
5368    const struct DdOpts options[] = {
5369        { "bs", img_dd_bs, C_BS },
5370        { "count", img_dd_count, C_COUNT },
5371        { "if", img_dd_if, C_IF },
5372        { "of", img_dd_of, C_OF },
5373        { "skip", img_dd_skip, C_SKIP },
5374        { NULL, NULL, 0 }
5375    };
5376    const struct option long_options[] = {
5377        { "help", no_argument, 0, 'h'},
5378        { "format", required_argument, 0, 'f'},
5379        { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5380        { "output-format", required_argument, 0, 'O'},
5381        { "force-share", no_argument, 0, 'U'},
5382        { "object", required_argument, 0, OPTION_OBJECT},
5383        { 0, 0, 0, 0 }
5384    };
5385
5386    while ((c = getopt_long(argc, argv, "hf:O:U", long_options, NULL))) {
5387        if (c == EOF) {
5388            break;
5389        }
5390        switch (c) {
5391        case 'h':
5392            cmd_help(ccmd, "[-f FMT|--image-opts] [-O OUTPUT_FMT] [-U]\n"
5393"        [--object OBJDEF] [bs=BLOCK_SIZE] [count=BLOCKS] if=INPUT of=OUTPUT\n"
5394,
5395"  -f, --format FMT\n"
5396"     specify format for INPUT explicitly (default: probing is used)\n"
5397"  --image-opts\n"
5398"     treat INPUT as an option string (key=value,..), not a file name\n"
5399"     (incompatible with -f|--format)\n"
5400"  -O, --output-format OUTPUT_FMT\n"
5401"     format of the OUTPUT (default: raw)\n"
5402"  -U, --force-share\n"
5403"     open images in shared mode for concurrent access\n"
5404"  --object OBJDEF\n"
5405"     defines QEMU user-creatable object\n"
5406"  bs=BLOCK_SIZE[bKMGTP]\n"
5407"     size of the I/O block, with optional multiplier suffix (powers of 1024)\n"
5408"     (default: 512)\n"
5409"  count=COUNT\n"
5410"     number of blocks to convert (default whole INPUT)\n"
5411"  if=INPUT\n"
5412"     name of the file, or option string (key=value,..)\n"
5413"     with --image-opts, to use for input\n"
5414"  of=OUTPUT\n"
5415"     output file name to create (will be overridden if alrady exists)\n"
5416);
5417            break;
5418        case 'f':
5419            fmt = optarg;
5420            break;
5421        case OPTION_IMAGE_OPTS:
5422            image_opts = true;
5423            break;
5424        case 'O':
5425            out_fmt = optarg;
5426            break;
5427        case 'U':
5428            force_share = true;
5429            break;
5430        case OPTION_OBJECT:
5431            user_creatable_process_cmdline(optarg);
5432            break;
5433        default:
5434            tryhelp(argv[0]);
5435        }
5436    }
5437
5438    for (i = optind; i < argc; i++) {
5439        int j;
5440        arg = g_strdup(argv[i]);
5441
5442        tmp = strchr(arg, '=');
5443        if (tmp == NULL) {
5444            error_report("unrecognized operand %s", arg);
5445            ret = -1;
5446            goto out;
5447        }
5448
5449        *tmp++ = '\0';
5450
5451        for (j = 0; options[j].name != NULL; j++) {
5452            if (!strcmp(arg, options[j].name)) {
5453                break;
5454            }
5455        }
5456        if (options[j].name == NULL) {
5457            error_report("unrecognized operand %s", arg);
5458            ret = -1;
5459            goto out;
5460        }
5461
5462        if (options[j].f(tmp, &in, &out, &dd) != 0) {
5463            ret = -1;
5464            goto out;
5465        }
5466        dd.flags |= options[j].flag;
5467        g_free(arg);
5468        arg = NULL;
5469    }
5470
5471    if (!(dd.flags & C_IF && dd.flags & C_OF)) {
5472        error_report("Must specify both input and output files");
5473        ret = -1;
5474        goto out;
5475    }
5476
5477    blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
5478                    force_share);
5479
5480    if (!blk1) {
5481        ret = -1;
5482        goto out;
5483    }
5484
5485    drv = bdrv_find_format(out_fmt);
5486    if (!drv) {
5487        error_report("Unknown file format");
5488        ret = -1;
5489        goto out;
5490    }
5491    proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
5492
5493    if (!proto_drv) {
5494        error_report_err(local_err);
5495        ret = -1;
5496        goto out;
5497    }
5498    if (!drv->create_opts) {
5499        error_report("Format driver '%s' does not support image creation",
5500                     drv->format_name);
5501        ret = -1;
5502        goto out;
5503    }
5504    if (!proto_drv->create_opts) {
5505        error_report("Protocol driver '%s' does not support image creation",
5506                     proto_drv->format_name);
5507        ret = -1;
5508        goto out;
5509    }
5510    create_opts = qemu_opts_append(create_opts, drv->create_opts);
5511    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5512
5513    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5514
5515    size = blk_getlength(blk1);
5516    if (size < 0) {
5517        error_report("Failed to get size for '%s'", in.filename);
5518        ret = -1;
5519        goto out;
5520    }
5521
5522    if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
5523        dd.count * in.bsz < size) {
5524        size = dd.count * in.bsz;
5525    }
5526
5527    /* Overflow means the specified offset is beyond input image's size */
5528    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5529                              size < in.bsz * in.offset)) {
5530        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
5531    } else {
5532        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
5533                            size - in.bsz * in.offset, &error_abort);
5534    }
5535
5536    ret = bdrv_create(drv, out.filename, opts, &local_err);
5537    if (ret < 0) {
5538        error_reportf_err(local_err,
5539                          "%s: error while creating output image: ",
5540                          out.filename);
5541        ret = -1;
5542        goto out;
5543    }
5544
5545    /* TODO, we can't honour --image-opts for the target,
5546     * since it needs to be given in a format compatible
5547     * with the bdrv_create() call above which does not
5548     * support image-opts style.
5549     */
5550    blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
5551                         false, false, false);
5552
5553    if (!blk2) {
5554        ret = -1;
5555        goto out;
5556    }
5557
5558    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5559                              size < in.offset * in.bsz)) {
5560        /* We give a warning if the skip option is bigger than the input
5561         * size and create an empty output disk image (i.e. like dd(1)).
5562         */
5563        error_report("%s: cannot skip to specified offset", in.filename);
5564        in_pos = size;
5565    } else {
5566        in_pos = in.offset * in.bsz;
5567    }
5568
5569    in.buf = g_new(uint8_t, in.bsz);
5570
5571    for (out_pos = 0; in_pos < size; ) {
5572        int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
5573
5574        ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
5575        if (ret < 0) {
5576            error_report("error while reading from input image file: %s",
5577                         strerror(-ret));
5578            goto out;
5579        }
5580        in_pos += bytes;
5581
5582        ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
5583        if (ret < 0) {
5584            error_report("error while writing to output image file: %s",
5585                         strerror(-ret));
5586            goto out;
5587        }
5588        out_pos += bytes;
5589    }
5590
5591out:
5592    g_free(arg);
5593    qemu_opts_del(opts);
5594    qemu_opts_free(create_opts);
5595    blk_unref(blk1);
5596    blk_unref(blk2);
5597    g_free(in.filename);
5598    g_free(out.filename);
5599    g_free(in.buf);
5600    g_free(out.buf);
5601
5602    if (ret) {
5603        return 1;
5604    }
5605    return 0;
5606}
5607
5608static void dump_json_block_measure_info(BlockMeasureInfo *info)
5609{
5610    GString *str;
5611    QObject *obj;
5612    Visitor *v = qobject_output_visitor_new(&obj);
5613
5614    visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
5615    visit_complete(v, &obj);
5616    str = qobject_to_json_pretty(obj, true);
5617    assert(str != NULL);
5618    printf("%s\n", str->str);
5619    qobject_unref(obj);
5620    visit_free(v);
5621    g_string_free(str, true);
5622}
5623
5624static int img_measure(const img_cmd_t *ccmd, int argc, char **argv)
5625{
5626    OutputFormat output_format = OFORMAT_HUMAN;
5627    BlockBackend *in_blk = NULL;
5628    BlockDriver *drv;
5629    const char *filename = NULL;
5630    const char *fmt = NULL;
5631    const char *out_fmt = "raw";
5632    char *options = NULL;
5633    char *snapshot_name = NULL;
5634    bool force_share = false;
5635    QemuOpts *opts = NULL;
5636    QemuOpts *object_opts = NULL;
5637    QemuOpts *sn_opts = NULL;
5638    QemuOptsList *create_opts = NULL;
5639    bool image_opts = false;
5640    int64_t img_size = -1;
5641    BlockMeasureInfo *info = NULL;
5642    Error *local_err = NULL;
5643    int ret = 1;
5644    int c;
5645
5646    static const struct option long_options[] = {
5647        {"help", no_argument, 0, 'h'},
5648        {"source-format", required_argument, 0, 'f'}, /* img_convert */
5649        {"format", required_argument, 0, 'f'},
5650        {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5651        {"source-image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, /* img_convert */
5652        {"snapshot", required_argument, 0, 'l'},
5653        {"target-format", required_argument, 0, 'O'},
5654        {"target-format-options", required_argument, 0, 'o'}, /* img_convert */
5655        {"options", required_argument, 0, 'o'},
5656        {"force-share", no_argument, 0, 'U'},
5657        {"output", required_argument, 0, OPTION_OUTPUT},
5658        {"object", required_argument, 0, OPTION_OBJECT},
5659        {"size", required_argument, 0, 's'},
5660        {0, 0, 0, 0}
5661    };
5662
5663    while ((c = getopt_long(argc, argv, "hf:l:O:o:Us:",
5664                            long_options, NULL)) != -1) {
5665        switch (c) {
5666        case 'h':
5667            cmd_help(ccmd, "[-f FMT|--image-opts] [-l SNAPSHOT]\n"
5668"       [-O TARGET_FMT] [-o TARGET_FMT_OPTS] [--output human|json]\n"
5669"       [--object OBJDEF] (--size SIZE | FILE)\n"
5670,
5671"  -f, --format\n"
5672"     specify format of FILE explicitly (default: probing is used)\n"
5673"  --image-opts\n"
5674"     indicates that FILE is a complete image specification\n"
5675"     instead of a file name (incompatible with --format)\n"
5676"  -l, --snapshot SNAPSHOT\n"
5677"     use this snapshot in FILE as source\n"
5678"  -O, --target-format TARGET_FMT\n"
5679"     desired target/output image format (default: raw)\n"
5680"  -o TARGET_FMT_OPTS\n"
5681"     options specific to TARGET_FMT\n"
5682"  --output human|json\n"
5683"     output format (default: human)\n"
5684"  -U, --force-share\n"
5685"     open images in shared mode for concurrent access\n"
5686"  --object OBJDEF\n"
5687"     defines QEMU user-creatable object\n"
5688"  -s, --size SIZE[bKMGTPE]\n"
5689"     measure file size for given image size,\n"
5690"     with optional multiplier suffix (powers of 1024)\n"
5691"  FILE\n"
5692"     measure file size required to convert from FILE (either a file name\n"
5693"     or an option string (key=value,..) with --image-options)\n"
5694);
5695            break;
5696        case 'f':
5697            fmt = optarg;
5698            break;
5699        case OPTION_IMAGE_OPTS:
5700            image_opts = true;
5701            break;
5702        case 'l':
5703            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
5704                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
5705                                                  optarg, false);
5706                if (!sn_opts) {
5707                    error_report("Failed in parsing snapshot param '%s'",
5708                                 optarg);
5709                    goto out;
5710                }
5711            } else {
5712                snapshot_name = optarg;
5713            }
5714            break;
5715        case 'O':
5716            out_fmt = optarg;
5717            break;
5718        case 'o':
5719            if (accumulate_options(&options, optarg) < 0) {
5720                goto out;
5721            }
5722            break;
5723        case 'U':
5724            force_share = true;
5725            break;
5726        case OPTION_OUTPUT:
5727            output_format = parse_output_format(argv[0], optarg);
5728            break;
5729        case OPTION_OBJECT:
5730            user_creatable_process_cmdline(optarg);
5731            break;
5732        case 's':
5733            img_size = cvtnum("image size", optarg, true);
5734            if (img_size < 0) {
5735                goto out;
5736            }
5737            break;
5738        default:
5739            tryhelp(argv[0]);
5740        }
5741    }
5742
5743    if (argc - optind > 1) {
5744        error_report("At most one filename argument is allowed.");
5745        goto out;
5746    } else if (argc - optind == 1) {
5747        filename = argv[optind];
5748    }
5749
5750    if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
5751        error_report("--image-opts, -f, and -l require a filename argument.");
5752        goto out;
5753    }
5754    if (filename && img_size != -1) {
5755        error_report("--size N cannot be used together with a filename.");
5756        goto out;
5757    }
5758    if (!filename && img_size == -1) {
5759        error_report("Either --size N or one filename must be specified.");
5760        goto out;
5761    }
5762
5763    if (filename) {
5764        in_blk = img_open(image_opts, filename, fmt, 0,
5765                          false, false, force_share);
5766        if (!in_blk) {
5767            goto out;
5768        }
5769
5770        if (sn_opts) {
5771            bdrv_snapshot_load_tmp(blk_bs(in_blk),
5772                    qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
5773                    qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
5774                    &local_err);
5775        } else if (snapshot_name != NULL) {
5776            bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
5777                    snapshot_name, &local_err);
5778        }
5779        if (local_err) {
5780            error_reportf_err(local_err, "Failed to load snapshot: ");
5781            goto out;
5782        }
5783    }
5784
5785    drv = bdrv_find_format(out_fmt);
5786    if (!drv) {
5787        error_report("Unknown file format '%s'", out_fmt);
5788        goto out;
5789    }
5790    if (!drv->create_opts) {
5791        error_report("Format driver '%s' does not support image creation",
5792                     drv->format_name);
5793        goto out;
5794    }
5795
5796    create_opts = qemu_opts_append(create_opts, drv->create_opts);
5797    create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
5798    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5799    if (options) {
5800        if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
5801            error_report_err(local_err);
5802            error_report("Invalid options for file format '%s'", out_fmt);
5803            goto out;
5804        }
5805    }
5806    if (img_size != -1) {
5807        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5808    }
5809
5810    info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5811    if (local_err) {
5812        error_report_err(local_err);
5813        goto out;
5814    }
5815
5816    if (output_format == OFORMAT_HUMAN) {
5817        printf("required size: %" PRIu64 "\n", info->required);
5818        printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5819        if (info->has_bitmaps) {
5820            printf("bitmaps size: %" PRIu64 "\n", info->bitmaps);
5821        }
5822    } else {
5823        dump_json_block_measure_info(info);
5824    }
5825
5826    ret = 0;
5827
5828out:
5829    qapi_free_BlockMeasureInfo(info);
5830    qemu_opts_del(object_opts);
5831    qemu_opts_del(opts);
5832    qemu_opts_del(sn_opts);
5833    qemu_opts_free(create_opts);
5834    g_free(options);
5835    blk_unref(in_blk);
5836    return ret;
5837}
5838
5839static const img_cmd_t img_cmds[] = {
5840    { "amend", img_amend,
5841      "Update format-specific options of the image" },
5842    { "bench", img_bench,
5843      "Run a simple image benchmark" },
5844    { "bitmap", img_bitmap,
5845      "Perform modifications of the persistent bitmap in the image" },
5846    { "check", img_check,
5847      "Check basic image integrity" },
5848    { "commit", img_commit,
5849      "Commit image to its backing file" },
5850    { "compare", img_compare,
5851      "Check if two images have the same contents" },
5852    { "convert", img_convert,
5853      "Copy one or more images to another with optional format conversion" },
5854    { "create", img_create,
5855      "Create and format a new image file" },
5856    { "dd", img_dd,
5857      "Copy input to output with optional format conversion" },
5858    { "info", img_info,
5859      "Display information about the image" },
5860    { "map", img_map,
5861      "Dump image metadata" },
5862    { "measure", img_measure,
5863      "Calculate the file size required for a new image" },
5864    { "rebase", img_rebase,
5865      "Change the backing file of the image" },
5866    { "resize", img_resize,
5867      "Resize the image" },
5868    { "snapshot", img_snapshot,
5869      "List or manipulate snapshots in the image" },
5870    { NULL, NULL, },
5871};
5872
5873static void format_print(void *opaque, const char *name)
5874{
5875    int *np = opaque;
5876    if (*np + strlen(name) > 75) {
5877        printf("\n ");
5878        *np = 1;
5879    }
5880    *np += printf(" %s", name);
5881}
5882
5883int main(int argc, char **argv)
5884{
5885    const img_cmd_t *cmd;
5886    const char *cmdname;
5887    int c;
5888    static const struct option long_options[] = {
5889        {"help", no_argument, 0, 'h'},
5890        {"version", no_argument, 0, 'V'},
5891        {"trace", required_argument, NULL, 'T'},
5892        {0, 0, 0, 0}
5893    };
5894
5895#ifdef CONFIG_POSIX
5896    signal(SIGPIPE, SIG_IGN);
5897#endif
5898
5899    socket_init();
5900    error_init(argv[0]);
5901    module_call_init(MODULE_INIT_TRACE);
5902    qemu_init_exec_dir(argv[0]);
5903
5904    qemu_init_main_loop(&error_fatal);
5905
5906    qcrypto_init(&error_fatal);
5907
5908    module_call_init(MODULE_INIT_QOM);
5909    bdrv_init();
5910
5911    qemu_add_opts(&qemu_source_opts);
5912    qemu_add_opts(&qemu_trace_opts);
5913
5914    while ((c = getopt_long(argc, argv, "+hVT:", long_options, NULL)) != -1) {
5915        switch (c) {
5916        case 'h':
5917            printf(
5918QEMU_IMG_VERSION
5919"QEMU disk image utility.  Usage:\n"
5920"\n"
5921"  qemu-img [standard options] COMMAND [--help | command options]\n"
5922"\n"
5923"Standard options:\n"
5924"  -h, --help\n"
5925"     display this help and exit\n"
5926"  -V, --version\n"
5927"     display version info and exit\n"
5928"  -T,--trace TRACE\n"
5929"     specify tracing options:\n"
5930"        [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
5931"\n"
5932"Recognized commands (run qemu-img COMMAND --help for command-specific help):\n\n");
5933            for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5934                printf("  %s - %s\n", cmd->name, cmd->description);
5935            }
5936            printf("\nSupported image formats:\n");
5937            c = 99; /* force a newline */
5938            bdrv_iterate_format(format_print, &c, false);
5939            if (c) {
5940                printf("\n");
5941            }
5942            printf("\n" QEMU_HELP_BOTTOM "\n");
5943            return 0;
5944        case 'V':
5945            printf(QEMU_IMG_VERSION);
5946            return 0;
5947        case 'T':
5948            trace_opt_parse(optarg);
5949            break;
5950        default:
5951            tryhelp(argv[0]);
5952        }
5953    }
5954
5955    if (optind >= argc) {
5956        error_exit(argv[0], "Not enough arguments");
5957    }
5958
5959    cmdname = argv[optind];
5960
5961    if (!trace_init_backends()) {
5962        exit(1);
5963    }
5964    trace_init_file();
5965    qemu_set_log(LOG_TRACE, &error_fatal);
5966
5967    /* find the command */
5968    for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5969        if (!strcmp(cmdname, cmd->name)) {
5970            g_autofree char *argv0 = g_strdup_printf("%s %s", argv[0], cmdname);
5971            /* reset options and getopt processing (incl return order) */
5972            argv += optind;
5973            argc -= optind;
5974            qemu_reset_optind();
5975            argv[0] = argv0;
5976            return cmd->handler(cmd, argc, argv);
5977        }
5978    }
5979
5980    /* not found */
5981    error_exit(argv[0], "Command not found: %s", cmdname);
5982}
5983