qemu/qemu-img.c
<<
>>
Prefs
   1/*
   2 * QEMU disk image utility
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include <getopt.h>
  27
  28#include "qemu-common.h"
  29#include "qemu-version.h"
  30#include "qapi/error.h"
  31#include "qapi/qapi-visit-block-core.h"
  32#include "qapi/qobject-output-visitor.h"
  33#include "qapi/qmp/qjson.h"
  34#include "qapi/qmp/qdict.h"
  35#include "qapi/qmp/qstring.h"
  36#include "qemu/cutils.h"
  37#include "qemu/config-file.h"
  38#include "qemu/option.h"
  39#include "qemu/error-report.h"
  40#include "qemu/log.h"
  41#include "qemu/main-loop.h"
  42#include "qemu/module.h"
  43#include "qemu/units.h"
  44#include "qom/object_interfaces.h"
  45#include "sysemu/block-backend.h"
  46#include "block/block_int.h"
  47#include "block/blockjob.h"
  48#include "block/qapi.h"
  49#include "crypto/init.h"
  50#include "trace/control.h"
  51
  52#define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
  53                          "\n" QEMU_COPYRIGHT "\n"
  54
  55typedef struct img_cmd_t {
  56    const char *name;
  57    int (*handler)(int argc, char **argv);
  58} img_cmd_t;
  59
  60enum {
  61    OPTION_OUTPUT = 256,
  62    OPTION_BACKING_CHAIN = 257,
  63    OPTION_OBJECT = 258,
  64    OPTION_IMAGE_OPTS = 259,
  65    OPTION_PATTERN = 260,
  66    OPTION_FLUSH_INTERVAL = 261,
  67    OPTION_NO_DRAIN = 262,
  68    OPTION_TARGET_IMAGE_OPTS = 263,
  69    OPTION_SIZE = 264,
  70    OPTION_PREALLOCATION = 265,
  71    OPTION_SHRINK = 266,
  72    OPTION_SALVAGE = 267,
  73    OPTION_TARGET_IS_ZERO = 268,
  74};
  75
  76typedef enum OutputFormat {
  77    OFORMAT_JSON,
  78    OFORMAT_HUMAN,
  79} OutputFormat;
  80
  81/* Default to cache=writeback as data integrity is not important for qemu-img */
  82#define BDRV_DEFAULT_CACHE "writeback"
  83
  84static void format_print(void *opaque, const char *name)
  85{
  86    printf(" %s", name);
  87}
  88
  89static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
  90{
  91    va_list ap;
  92
  93    va_start(ap, fmt);
  94    error_vreport(fmt, ap);
  95    va_end(ap);
  96
  97    error_printf("Try 'qemu-img --help' for more information\n");
  98    exit(EXIT_FAILURE);
  99}
 100
 101static void QEMU_NORETURN missing_argument(const char *option)
 102{
 103    error_exit("missing argument for option '%s'", option);
 104}
 105
 106static void QEMU_NORETURN unrecognized_option(const char *option)
 107{
 108    error_exit("unrecognized option '%s'", option);
 109}
 110
 111/* Please keep in synch with qemu-img.texi */
 112static void QEMU_NORETURN help(void)
 113{
 114    const char *help_msg =
 115           QEMU_IMG_VERSION
 116           "usage: qemu-img [standard options] command [command options]\n"
 117           "QEMU disk image utility\n"
 118           "\n"
 119           "    '-h', '--help'       display this help and exit\n"
 120           "    '-V', '--version'    output version information and exit\n"
 121           "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
 122           "                         specify tracing options\n"
 123           "\n"
 124           "Command syntax:\n"
 125#define DEF(option, callback, arg_string)        \
 126           "  " arg_string "\n"
 127#include "qemu-img-cmds.h"
 128#undef DEF
 129           "\n"
 130           "Command parameters:\n"
 131           "  'filename' is a disk image filename\n"
 132           "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
 133           "    manual page for a description of the object properties. The most common\n"
 134           "    object type is a 'secret', which is used to supply passwords and/or\n"
 135           "    encryption keys.\n"
 136           "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
 137           "  'cache' is the cache mode used to write the output disk image, the valid\n"
 138           "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
 139           "    'directsync' and 'unsafe' (default for convert)\n"
 140           "  'src_cache' is the cache mode used to read input disk images, the valid\n"
 141           "    options are the same as for the 'cache' option\n"
 142           "  'size' is the disk image size in bytes. Optional suffixes\n"
 143           "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
 144           "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
 145           "    supported. 'b' is ignored.\n"
 146           "  'output_filename' is the destination disk image filename\n"
 147           "  'output_fmt' is the destination format\n"
 148           "  'options' is a comma separated list of format specific options in a\n"
 149           "    name=value format. Use -o ? for an overview of the options supported by the\n"
 150           "    used format\n"
 151           "  'snapshot_param' is param used for internal snapshot, format\n"
 152           "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
 153           "    '[ID_OR_NAME]'\n"
 154           "  '-c' indicates that target image must be compressed (qcow format only)\n"
 155           "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
 156           "       new backing file match exactly. The image doesn't need a working\n"
 157           "       backing file before rebasing in this case (useful for renaming the\n"
 158           "       backing file). For image creation, allow creating without attempting\n"
 159           "       to open the backing file.\n"
 160           "  '-h' with or without a command shows this help and lists the supported formats\n"
 161           "  '-p' show progress of command (only certain commands)\n"
 162           "  '-q' use Quiet mode - do not print any output (except errors)\n"
 163           "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
 164           "       contain only zeros for qemu-img to create a sparse image during\n"
 165           "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
 166           "       unallocated or zero sectors, and the destination image will always be\n"
 167           "       fully allocated\n"
 168           "  '--output' takes the format in which the output must be done (human or json)\n"
 169           "  '-n' skips the target volume creation (useful if the volume is created\n"
 170           "       prior to running qemu-img)\n"
 171           "\n"
 172           "Parameters to check subcommand:\n"
 173           "  '-r' tries to repair any inconsistencies that are found during the check.\n"
 174           "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
 175           "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
 176           "       hiding corruption that has already occurred.\n"
 177           "\n"
 178           "Parameters to convert subcommand:\n"
 179           "  '-m' specifies how many coroutines work in parallel during the convert\n"
 180           "       process (defaults to 8)\n"
 181           "  '-W' allow to write to the target out of order rather than sequential\n"
 182           "\n"
 183           "Parameters to snapshot subcommand:\n"
 184           "  'snapshot' is the name of the snapshot to create, apply or delete\n"
 185           "  '-a' applies a snapshot (revert disk to saved state)\n"
 186           "  '-c' creates a snapshot\n"
 187           "  '-d' deletes a snapshot\n"
 188           "  '-l' lists all snapshots in the given image\n"
 189           "\n"
 190           "Parameters to compare subcommand:\n"
 191           "  '-f' first image format\n"
 192           "  '-F' second image format\n"
 193           "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
 194           "\n"
 195           "Parameters to dd subcommand:\n"
 196           "  'bs=BYTES' read and write up to BYTES bytes at a time "
 197           "(default: 512)\n"
 198           "  'count=N' copy only N input blocks\n"
 199           "  'if=FILE' read from FILE\n"
 200           "  'of=FILE' write to FILE\n"
 201           "  'skip=N' skip N bs-sized blocks at the start of input\n";
 202
 203    printf("%s\nSupported formats:", help_msg);
 204    bdrv_iterate_format(format_print, NULL, false);
 205    printf("\n\n" QEMU_HELP_BOTTOM "\n");
 206    exit(EXIT_SUCCESS);
 207}
 208
 209static QemuOptsList qemu_object_opts = {
 210    .name = "object",
 211    .implied_opt_name = "qom-type",
 212    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
 213    .desc = {
 214        { }
 215    },
 216};
 217
 218static bool qemu_img_object_print_help(const char *type, QemuOpts *opts)
 219{
 220    if (user_creatable_print_help(type, opts)) {
 221        exit(0);
 222    }
 223    return true;
 224}
 225
 226static QemuOptsList qemu_source_opts = {
 227    .name = "source",
 228    .implied_opt_name = "file",
 229    .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
 230    .desc = {
 231        { }
 232    },
 233};
 234
 235static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
 236{
 237    int ret = 0;
 238    if (!quiet) {
 239        va_list args;
 240        va_start(args, fmt);
 241        ret = vprintf(fmt, args);
 242        va_end(args);
 243    }
 244    return ret;
 245}
 246
 247
 248static int print_block_option_help(const char *filename, const char *fmt)
 249{
 250    BlockDriver *drv, *proto_drv;
 251    QemuOptsList *create_opts = NULL;
 252    Error *local_err = NULL;
 253
 254    /* Find driver and parse its options */
 255    drv = bdrv_find_format(fmt);
 256    if (!drv) {
 257        error_report("Unknown file format '%s'", fmt);
 258        return 1;
 259    }
 260
 261    if (!drv->create_opts) {
 262        error_report("Format driver '%s' does not support image creation", fmt);
 263        return 1;
 264    }
 265
 266    create_opts = qemu_opts_append(create_opts, drv->create_opts);
 267    if (filename) {
 268        proto_drv = bdrv_find_protocol(filename, true, &local_err);
 269        if (!proto_drv) {
 270            error_report_err(local_err);
 271            qemu_opts_free(create_opts);
 272            return 1;
 273        }
 274        if (!proto_drv->create_opts) {
 275            error_report("Protocol driver '%s' does not support image creation",
 276                         proto_drv->format_name);
 277            qemu_opts_free(create_opts);
 278            return 1;
 279        }
 280        create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
 281    }
 282
 283    if (filename) {
 284        printf("Supported options:\n");
 285    } else {
 286        printf("Supported %s options:\n", fmt);
 287    }
 288    qemu_opts_print_help(create_opts, false);
 289    qemu_opts_free(create_opts);
 290
 291    if (!filename) {
 292        printf("\n"
 293               "The protocol level may support further options.\n"
 294               "Specify the target filename to include those options.\n");
 295    }
 296
 297    return 0;
 298}
 299
 300
 301static BlockBackend *img_open_opts(const char *optstr,
 302                                   QemuOpts *opts, int flags, bool writethrough,
 303                                   bool quiet, bool force_share)
 304{
 305    QDict *options;
 306    Error *local_err = NULL;
 307    BlockBackend *blk;
 308    options = qemu_opts_to_qdict(opts, NULL);
 309    if (force_share) {
 310        if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
 311            && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
 312            error_report("--force-share/-U conflicts with image options");
 313            qobject_unref(options);
 314            return NULL;
 315        }
 316        qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
 317    }
 318    blk = blk_new_open(NULL, NULL, options, flags, &local_err);
 319    if (!blk) {
 320        error_reportf_err(local_err, "Could not open '%s': ", optstr);
 321        return NULL;
 322    }
 323    blk_set_enable_write_cache(blk, !writethrough);
 324
 325    return blk;
 326}
 327
 328static BlockBackend *img_open_file(const char *filename,
 329                                   QDict *options,
 330                                   const char *fmt, int flags,
 331                                   bool writethrough, bool quiet,
 332                                   bool force_share)
 333{
 334    BlockBackend *blk;
 335    Error *local_err = NULL;
 336
 337    if (!options) {
 338        options = qdict_new();
 339    }
 340    if (fmt) {
 341        qdict_put_str(options, "driver", fmt);
 342    }
 343
 344    if (force_share) {
 345        qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
 346    }
 347    blk = blk_new_open(filename, NULL, options, flags, &local_err);
 348    if (!blk) {
 349        error_reportf_err(local_err, "Could not open '%s': ", filename);
 350        return NULL;
 351    }
 352    blk_set_enable_write_cache(blk, !writethrough);
 353
 354    return blk;
 355}
 356
 357
 358static int img_add_key_secrets(void *opaque,
 359                               const char *name, const char *value,
 360                               Error **errp)
 361{
 362    QDict *options = opaque;
 363
 364    if (g_str_has_suffix(name, "key-secret")) {
 365        qdict_put_str(options, name, value);
 366    }
 367
 368    return 0;
 369}
 370
 371
 372static BlockBackend *img_open(bool image_opts,
 373                              const char *filename,
 374                              const char *fmt, int flags, bool writethrough,
 375                              bool quiet, bool force_share)
 376{
 377    BlockBackend *blk;
 378    if (image_opts) {
 379        QemuOpts *opts;
 380        if (fmt) {
 381            error_report("--image-opts and --format are mutually exclusive");
 382            return NULL;
 383        }
 384        opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
 385                                       filename, true);
 386        if (!opts) {
 387            return NULL;
 388        }
 389        blk = img_open_opts(filename, opts, flags, writethrough, quiet,
 390                            force_share);
 391    } else {
 392        blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
 393                            force_share);
 394    }
 395    return blk;
 396}
 397
 398
 399static int add_old_style_options(const char *fmt, QemuOpts *opts,
 400                                 const char *base_filename,
 401                                 const char *base_fmt)
 402{
 403    Error *err = NULL;
 404
 405    if (base_filename) {
 406        qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
 407        if (err) {
 408            error_report("Backing file not supported for file format '%s'",
 409                         fmt);
 410            error_free(err);
 411            return -1;
 412        }
 413    }
 414    if (base_fmt) {
 415        qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
 416        if (err) {
 417            error_report("Backing file format not supported for file "
 418                         "format '%s'", fmt);
 419            error_free(err);
 420            return -1;
 421        }
 422    }
 423    return 0;
 424}
 425
 426static int64_t cvtnum(const char *s)
 427{
 428    int err;
 429    uint64_t value;
 430
 431    err = qemu_strtosz(s, NULL, &value);
 432    if (err < 0) {
 433        return err;
 434    }
 435    if (value > INT64_MAX) {
 436        return -ERANGE;
 437    }
 438    return value;
 439}
 440
 441static int img_create(int argc, char **argv)
 442{
 443    int c;
 444    uint64_t img_size = -1;
 445    const char *fmt = "raw";
 446    const char *base_fmt = NULL;
 447    const char *filename;
 448    const char *base_filename = NULL;
 449    char *options = NULL;
 450    Error *local_err = NULL;
 451    bool quiet = false;
 452    int flags = 0;
 453
 454    for(;;) {
 455        static const struct option long_options[] = {
 456            {"help", no_argument, 0, 'h'},
 457            {"object", required_argument, 0, OPTION_OBJECT},
 458            {0, 0, 0, 0}
 459        };
 460        c = getopt_long(argc, argv, ":F:b:f:ho:qu",
 461                        long_options, NULL);
 462        if (c == -1) {
 463            break;
 464        }
 465        switch(c) {
 466        case ':':
 467            missing_argument(argv[optind - 1]);
 468            break;
 469        case '?':
 470            unrecognized_option(argv[optind - 1]);
 471            break;
 472        case 'h':
 473            help();
 474            break;
 475        case 'F':
 476            base_fmt = optarg;
 477            break;
 478        case 'b':
 479            base_filename = optarg;
 480            break;
 481        case 'f':
 482            fmt = optarg;
 483            break;
 484        case 'o':
 485            if (!is_valid_option_list(optarg)) {
 486                error_report("Invalid option list: %s", optarg);
 487                goto fail;
 488            }
 489            if (!options) {
 490                options = g_strdup(optarg);
 491            } else {
 492                char *old_options = options;
 493                options = g_strdup_printf("%s,%s", options, optarg);
 494                g_free(old_options);
 495            }
 496            break;
 497        case 'q':
 498            quiet = true;
 499            break;
 500        case 'u':
 501            flags |= BDRV_O_NO_BACKING;
 502            break;
 503        case OPTION_OBJECT: {
 504            QemuOpts *opts;
 505            opts = qemu_opts_parse_noisily(&qemu_object_opts,
 506                                           optarg, true);
 507            if (!opts) {
 508                goto fail;
 509            }
 510        }   break;
 511        }
 512    }
 513
 514    /* Get the filename */
 515    filename = (optind < argc) ? argv[optind] : NULL;
 516    if (options && has_help_option(options)) {
 517        g_free(options);
 518        return print_block_option_help(filename, fmt);
 519    }
 520
 521    if (optind >= argc) {
 522        error_exit("Expecting image file name");
 523    }
 524    optind++;
 525
 526    if (qemu_opts_foreach(&qemu_object_opts,
 527                          user_creatable_add_opts_foreach,
 528                          qemu_img_object_print_help, &error_fatal)) {
 529        goto fail;
 530    }
 531
 532    /* Get image size, if specified */
 533    if (optind < argc) {
 534        int64_t sval;
 535
 536        sval = cvtnum(argv[optind++]);
 537        if (sval < 0) {
 538            if (sval == -ERANGE) {
 539                error_report("Image size must be less than 8 EiB!");
 540            } else {
 541                error_report("Invalid image size specified! You may use k, M, "
 542                      "G, T, P or E suffixes for ");
 543                error_report("kilobytes, megabytes, gigabytes, terabytes, "
 544                             "petabytes and exabytes.");
 545            }
 546            goto fail;
 547        }
 548        img_size = (uint64_t)sval;
 549    }
 550    if (optind != argc) {
 551        error_exit("Unexpected argument: %s", argv[optind]);
 552    }
 553
 554    bdrv_img_create(filename, fmt, base_filename, base_fmt,
 555                    options, img_size, flags, quiet, &local_err);
 556    if (local_err) {
 557        error_reportf_err(local_err, "%s: ", filename);
 558        goto fail;
 559    }
 560
 561    g_free(options);
 562    return 0;
 563
 564fail:
 565    g_free(options);
 566    return 1;
 567}
 568
 569static void dump_json_image_check(ImageCheck *check, bool quiet)
 570{
 571    QString *str;
 572    QObject *obj;
 573    Visitor *v = qobject_output_visitor_new(&obj);
 574
 575    visit_type_ImageCheck(v, NULL, &check, &error_abort);
 576    visit_complete(v, &obj);
 577    str = qobject_to_json_pretty(obj);
 578    assert(str != NULL);
 579    qprintf(quiet, "%s\n", qstring_get_str(str));
 580    qobject_unref(obj);
 581    visit_free(v);
 582    qobject_unref(str);
 583}
 584
 585static void dump_human_image_check(ImageCheck *check, bool quiet)
 586{
 587    if (!(check->corruptions || check->leaks || check->check_errors)) {
 588        qprintf(quiet, "No errors were found on the image.\n");
 589    } else {
 590        if (check->corruptions) {
 591            qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
 592                    "Data may be corrupted, or further writes to the image "
 593                    "may corrupt it.\n",
 594                    check->corruptions);
 595        }
 596
 597        if (check->leaks) {
 598            qprintf(quiet,
 599                    "\n%" PRId64 " leaked clusters were found on the image.\n"
 600                    "This means waste of disk space, but no harm to data.\n",
 601                    check->leaks);
 602        }
 603
 604        if (check->check_errors) {
 605            qprintf(quiet,
 606                    "\n%" PRId64
 607                    " internal errors have occurred during the check.\n",
 608                    check->check_errors);
 609        }
 610    }
 611
 612    if (check->total_clusters != 0 && check->allocated_clusters != 0) {
 613        qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
 614                "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
 615                check->allocated_clusters, check->total_clusters,
 616                check->allocated_clusters * 100.0 / check->total_clusters,
 617                check->fragmented_clusters * 100.0 / check->allocated_clusters,
 618                check->compressed_clusters * 100.0 /
 619                check->allocated_clusters);
 620    }
 621
 622    if (check->image_end_offset) {
 623        qprintf(quiet,
 624                "Image end offset: %" PRId64 "\n", check->image_end_offset);
 625    }
 626}
 627
 628static int collect_image_check(BlockDriverState *bs,
 629                   ImageCheck *check,
 630                   const char *filename,
 631                   const char *fmt,
 632                   int fix)
 633{
 634    int ret;
 635    BdrvCheckResult result;
 636
 637    ret = bdrv_check(bs, &result, fix);
 638    if (ret < 0) {
 639        return ret;
 640    }
 641
 642    check->filename                 = g_strdup(filename);
 643    check->format                   = g_strdup(bdrv_get_format_name(bs));
 644    check->check_errors             = result.check_errors;
 645    check->corruptions              = result.corruptions;
 646    check->has_corruptions          = result.corruptions != 0;
 647    check->leaks                    = result.leaks;
 648    check->has_leaks                = result.leaks != 0;
 649    check->corruptions_fixed        = result.corruptions_fixed;
 650    check->has_corruptions_fixed    = result.corruptions_fixed != 0;
 651    check->leaks_fixed              = result.leaks_fixed;
 652    check->has_leaks_fixed          = result.leaks_fixed != 0;
 653    check->image_end_offset         = result.image_end_offset;
 654    check->has_image_end_offset     = result.image_end_offset != 0;
 655    check->total_clusters           = result.bfi.total_clusters;
 656    check->has_total_clusters       = result.bfi.total_clusters != 0;
 657    check->allocated_clusters       = result.bfi.allocated_clusters;
 658    check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
 659    check->fragmented_clusters      = result.bfi.fragmented_clusters;
 660    check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
 661    check->compressed_clusters      = result.bfi.compressed_clusters;
 662    check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
 663
 664    return 0;
 665}
 666
 667/*
 668 * Checks an image for consistency. Exit codes:
 669 *
 670 *  0 - Check completed, image is good
 671 *  1 - Check not completed because of internal errors
 672 *  2 - Check completed, image is corrupted
 673 *  3 - Check completed, image has leaked clusters, but is good otherwise
 674 * 63 - Checks are not supported by the image format
 675 */
 676static int img_check(int argc, char **argv)
 677{
 678    int c, ret;
 679    OutputFormat output_format = OFORMAT_HUMAN;
 680    const char *filename, *fmt, *output, *cache;
 681    BlockBackend *blk;
 682    BlockDriverState *bs;
 683    int fix = 0;
 684    int flags = BDRV_O_CHECK;
 685    bool writethrough;
 686    ImageCheck *check;
 687    bool quiet = false;
 688    bool image_opts = false;
 689    bool force_share = false;
 690
 691    fmt = NULL;
 692    output = NULL;
 693    cache = BDRV_DEFAULT_CACHE;
 694
 695    for(;;) {
 696        int option_index = 0;
 697        static const struct option long_options[] = {
 698            {"help", no_argument, 0, 'h'},
 699            {"format", required_argument, 0, 'f'},
 700            {"repair", required_argument, 0, 'r'},
 701            {"output", required_argument, 0, OPTION_OUTPUT},
 702            {"object", required_argument, 0, OPTION_OBJECT},
 703            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
 704            {"force-share", no_argument, 0, 'U'},
 705            {0, 0, 0, 0}
 706        };
 707        c = getopt_long(argc, argv, ":hf:r:T:qU",
 708                        long_options, &option_index);
 709        if (c == -1) {
 710            break;
 711        }
 712        switch(c) {
 713        case ':':
 714            missing_argument(argv[optind - 1]);
 715            break;
 716        case '?':
 717            unrecognized_option(argv[optind - 1]);
 718            break;
 719        case 'h':
 720            help();
 721            break;
 722        case 'f':
 723            fmt = optarg;
 724            break;
 725        case 'r':
 726            flags |= BDRV_O_RDWR;
 727
 728            if (!strcmp(optarg, "leaks")) {
 729                fix = BDRV_FIX_LEAKS;
 730            } else if (!strcmp(optarg, "all")) {
 731                fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
 732            } else {
 733                error_exit("Unknown option value for -r "
 734                           "(expecting 'leaks' or 'all'): %s", optarg);
 735            }
 736            break;
 737        case OPTION_OUTPUT:
 738            output = optarg;
 739            break;
 740        case 'T':
 741            cache = optarg;
 742            break;
 743        case 'q':
 744            quiet = true;
 745            break;
 746        case 'U':
 747            force_share = true;
 748            break;
 749        case OPTION_OBJECT: {
 750            QemuOpts *opts;
 751            opts = qemu_opts_parse_noisily(&qemu_object_opts,
 752                                           optarg, true);
 753            if (!opts) {
 754                return 1;
 755            }
 756        }   break;
 757        case OPTION_IMAGE_OPTS:
 758            image_opts = true;
 759            break;
 760        }
 761    }
 762    if (optind != argc - 1) {
 763        error_exit("Expecting one image file name");
 764    }
 765    filename = argv[optind++];
 766
 767    if (output && !strcmp(output, "json")) {
 768        output_format = OFORMAT_JSON;
 769    } else if (output && !strcmp(output, "human")) {
 770        output_format = OFORMAT_HUMAN;
 771    } else if (output) {
 772        error_report("--output must be used with human or json as argument.");
 773        return 1;
 774    }
 775
 776    if (qemu_opts_foreach(&qemu_object_opts,
 777                          user_creatable_add_opts_foreach,
 778                          qemu_img_object_print_help, &error_fatal)) {
 779        return 1;
 780    }
 781
 782    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
 783    if (ret < 0) {
 784        error_report("Invalid source cache option: %s", cache);
 785        return 1;
 786    }
 787
 788    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
 789                   force_share);
 790    if (!blk) {
 791        return 1;
 792    }
 793    bs = blk_bs(blk);
 794
 795    check = g_new0(ImageCheck, 1);
 796    ret = collect_image_check(bs, check, filename, fmt, fix);
 797
 798    if (ret == -ENOTSUP) {
 799        error_report("This image format does not support checks");
 800        ret = 63;
 801        goto fail;
 802    }
 803
 804    if (check->corruptions_fixed || check->leaks_fixed) {
 805        int corruptions_fixed, leaks_fixed;
 806        bool has_leaks_fixed, has_corruptions_fixed;
 807
 808        leaks_fixed         = check->leaks_fixed;
 809        has_leaks_fixed     = check->has_leaks_fixed;
 810        corruptions_fixed   = check->corruptions_fixed;
 811        has_corruptions_fixed = check->has_corruptions_fixed;
 812
 813        if (output_format == OFORMAT_HUMAN) {
 814            qprintf(quiet,
 815                    "The following inconsistencies were found and repaired:\n\n"
 816                    "    %" PRId64 " leaked clusters\n"
 817                    "    %" PRId64 " corruptions\n\n"
 818                    "Double checking the fixed image now...\n",
 819                    check->leaks_fixed,
 820                    check->corruptions_fixed);
 821        }
 822
 823        qapi_free_ImageCheck(check);
 824        check = g_new0(ImageCheck, 1);
 825        ret = collect_image_check(bs, check, filename, fmt, 0);
 826
 827        check->leaks_fixed          = leaks_fixed;
 828        check->has_leaks_fixed      = has_leaks_fixed;
 829        check->corruptions_fixed    = corruptions_fixed;
 830        check->has_corruptions_fixed = has_corruptions_fixed;
 831    }
 832
 833    if (!ret) {
 834        switch (output_format) {
 835        case OFORMAT_HUMAN:
 836            dump_human_image_check(check, quiet);
 837            break;
 838        case OFORMAT_JSON:
 839            dump_json_image_check(check, quiet);
 840            break;
 841        }
 842    }
 843
 844    if (ret || check->check_errors) {
 845        if (ret) {
 846            error_report("Check failed: %s", strerror(-ret));
 847        } else {
 848            error_report("Check failed");
 849        }
 850        ret = 1;
 851        goto fail;
 852    }
 853
 854    if (check->corruptions) {
 855        ret = 2;
 856    } else if (check->leaks) {
 857        ret = 3;
 858    } else {
 859        ret = 0;
 860    }
 861
 862fail:
 863    qapi_free_ImageCheck(check);
 864    blk_unref(blk);
 865    return ret;
 866}
 867
 868typedef struct CommonBlockJobCBInfo {
 869    BlockDriverState *bs;
 870    Error **errp;
 871} CommonBlockJobCBInfo;
 872
 873static void common_block_job_cb(void *opaque, int ret)
 874{
 875    CommonBlockJobCBInfo *cbi = opaque;
 876
 877    if (ret < 0) {
 878        error_setg_errno(cbi->errp, -ret, "Block job failed");
 879    }
 880}
 881
 882static void run_block_job(BlockJob *job, Error **errp)
 883{
 884    AioContext *aio_context = blk_get_aio_context(job->blk);
 885    int ret = 0;
 886
 887    aio_context_acquire(aio_context);
 888    job_ref(&job->job);
 889    do {
 890        float progress = 0.0f;
 891        aio_poll(aio_context, true);
 892        if (job->job.progress.total) {
 893            progress = (float)job->job.progress.current /
 894                       job->job.progress.total * 100.f;
 895        }
 896        qemu_progress_print(progress, 0);
 897    } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
 898
 899    if (!job_is_completed(&job->job)) {
 900        ret = job_complete_sync(&job->job, errp);
 901    } else {
 902        ret = job->job.ret;
 903    }
 904    job_unref(&job->job);
 905    aio_context_release(aio_context);
 906
 907    /* publish completion progress only when success */
 908    if (!ret) {
 909        qemu_progress_print(100.f, 0);
 910    }
 911}
 912
 913static int img_commit(int argc, char **argv)
 914{
 915    int c, ret, flags;
 916    const char *filename, *fmt, *cache, *base;
 917    BlockBackend *blk;
 918    BlockDriverState *bs, *base_bs;
 919    BlockJob *job;
 920    bool progress = false, quiet = false, drop = false;
 921    bool writethrough;
 922    Error *local_err = NULL;
 923    CommonBlockJobCBInfo cbi;
 924    bool image_opts = false;
 925    AioContext *aio_context;
 926
 927    fmt = NULL;
 928    cache = BDRV_DEFAULT_CACHE;
 929    base = NULL;
 930    for(;;) {
 931        static const struct option long_options[] = {
 932            {"help", no_argument, 0, 'h'},
 933            {"object", required_argument, 0, OPTION_OBJECT},
 934            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
 935            {0, 0, 0, 0}
 936        };
 937        c = getopt_long(argc, argv, ":f:ht:b:dpq",
 938                        long_options, NULL);
 939        if (c == -1) {
 940            break;
 941        }
 942        switch(c) {
 943        case ':':
 944            missing_argument(argv[optind - 1]);
 945            break;
 946        case '?':
 947            unrecognized_option(argv[optind - 1]);
 948            break;
 949        case 'h':
 950            help();
 951            break;
 952        case 'f':
 953            fmt = optarg;
 954            break;
 955        case 't':
 956            cache = optarg;
 957            break;
 958        case 'b':
 959            base = optarg;
 960            /* -b implies -d */
 961            drop = true;
 962            break;
 963        case 'd':
 964            drop = true;
 965            break;
 966        case 'p':
 967            progress = true;
 968            break;
 969        case 'q':
 970            quiet = true;
 971            break;
 972        case OPTION_OBJECT: {
 973            QemuOpts *opts;
 974            opts = qemu_opts_parse_noisily(&qemu_object_opts,
 975                                           optarg, true);
 976            if (!opts) {
 977                return 1;
 978            }
 979        }   break;
 980        case OPTION_IMAGE_OPTS:
 981            image_opts = true;
 982            break;
 983        }
 984    }
 985
 986    /* Progress is not shown in Quiet mode */
 987    if (quiet) {
 988        progress = false;
 989    }
 990
 991    if (optind != argc - 1) {
 992        error_exit("Expecting one image file name");
 993    }
 994    filename = argv[optind++];
 995
 996    if (qemu_opts_foreach(&qemu_object_opts,
 997                          user_creatable_add_opts_foreach,
 998                          qemu_img_object_print_help, &error_fatal)) {
 999        return 1;
1000    }
1001
1002    flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1003    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1004    if (ret < 0) {
1005        error_report("Invalid cache option: %s", cache);
1006        return 1;
1007    }
1008
1009    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1010                   false);
1011    if (!blk) {
1012        return 1;
1013    }
1014    bs = blk_bs(blk);
1015
1016    qemu_progress_init(progress, 1.f);
1017    qemu_progress_print(0.f, 100);
1018
1019    if (base) {
1020        base_bs = bdrv_find_backing_image(bs, base);
1021        if (!base_bs) {
1022            error_setg(&local_err,
1023                       "Did not find '%s' in the backing chain of '%s'",
1024                       base, filename);
1025            goto done;
1026        }
1027    } else {
1028        /* This is different from QMP, which by default uses the deepest file in
1029         * the backing chain (i.e., the very base); however, the traditional
1030         * behavior of qemu-img commit is using the immediate backing file. */
1031        base_bs = backing_bs(bs);
1032        if (!base_bs) {
1033            error_setg(&local_err, "Image does not have a backing file");
1034            goto done;
1035        }
1036    }
1037
1038    cbi = (CommonBlockJobCBInfo){
1039        .errp = &local_err,
1040        .bs   = bs,
1041    };
1042
1043    aio_context = bdrv_get_aio_context(bs);
1044    aio_context_acquire(aio_context);
1045    commit_active_start("commit", bs, base_bs, JOB_DEFAULT, 0,
1046                        BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1047                        &cbi, false, &local_err);
1048    aio_context_release(aio_context);
1049    if (local_err) {
1050        goto done;
1051    }
1052
1053    /* When the block job completes, the BlockBackend reference will point to
1054     * the old backing file. In order to avoid that the top image is already
1055     * deleted, so we can still empty it afterwards, increment the reference
1056     * counter here preemptively. */
1057    if (!drop) {
1058        bdrv_ref(bs);
1059    }
1060
1061    job = block_job_get("commit");
1062    assert(job);
1063    run_block_job(job, &local_err);
1064    if (local_err) {
1065        goto unref_backing;
1066    }
1067
1068    if (!drop && bs->drv->bdrv_make_empty) {
1069        ret = bs->drv->bdrv_make_empty(bs);
1070        if (ret) {
1071            error_setg_errno(&local_err, -ret, "Could not empty %s",
1072                             filename);
1073            goto unref_backing;
1074        }
1075    }
1076
1077unref_backing:
1078    if (!drop) {
1079        bdrv_unref(bs);
1080    }
1081
1082done:
1083    qemu_progress_end();
1084
1085    blk_unref(blk);
1086
1087    if (local_err) {
1088        error_report_err(local_err);
1089        return 1;
1090    }
1091
1092    qprintf(quiet, "Image committed.\n");
1093    return 0;
1094}
1095
1096/*
1097 * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1098 * of the first sector boundary within buf where the sector contains a
1099 * non-zero byte.  This function is robust to a buffer that is not
1100 * sector-aligned.
1101 */
1102static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1103{
1104    int64_t i;
1105    int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1106
1107    for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1108        if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1109            return i;
1110        }
1111    }
1112    if (i < n && !buffer_is_zero(buf + i, n - end)) {
1113        return i;
1114    }
1115    return -1;
1116}
1117
1118/*
1119 * Returns true iff the first sector pointed to by 'buf' contains at least
1120 * a non-NUL byte.
1121 *
1122 * 'pnum' is set to the number of sectors (including and immediately following
1123 * the first one) that are known to be in the same allocated/unallocated state.
1124 * The function will try to align the end offset to alignment boundaries so
1125 * that the request will at least end aligned and consequtive requests will
1126 * also start at an aligned offset.
1127 */
1128static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1129                                int64_t sector_num, int alignment)
1130{
1131    bool is_zero;
1132    int i, tail;
1133
1134    if (n <= 0) {
1135        *pnum = 0;
1136        return 0;
1137    }
1138    is_zero = buffer_is_zero(buf, 512);
1139    for(i = 1; i < n; i++) {
1140        buf += 512;
1141        if (is_zero != buffer_is_zero(buf, 512)) {
1142            break;
1143        }
1144    }
1145
1146    tail = (sector_num + i) & (alignment - 1);
1147    if (tail) {
1148        if (is_zero && i <= tail) {
1149            /* treat unallocated areas which only consist
1150             * of a small tail as allocated. */
1151            is_zero = false;
1152        }
1153        if (!is_zero) {
1154            /* align up end offset of allocated areas. */
1155            i += alignment - tail;
1156            i = MIN(i, n);
1157        } else {
1158            /* align down end offset of zero areas. */
1159            i -= tail;
1160        }
1161    }
1162    *pnum = i;
1163    return !is_zero;
1164}
1165
1166/*
1167 * Like is_allocated_sectors, but if the buffer starts with a used sector,
1168 * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1169 * breaking up write requests for only small sparse areas.
1170 */
1171static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1172    int min, int64_t sector_num, int alignment)
1173{
1174    int ret;
1175    int num_checked, num_used;
1176
1177    if (n < min) {
1178        min = n;
1179    }
1180
1181    ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1182    if (!ret) {
1183        return ret;
1184    }
1185
1186    num_used = *pnum;
1187    buf += BDRV_SECTOR_SIZE * *pnum;
1188    n -= *pnum;
1189    sector_num += *pnum;
1190    num_checked = num_used;
1191
1192    while (n > 0) {
1193        ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1194
1195        buf += BDRV_SECTOR_SIZE * *pnum;
1196        n -= *pnum;
1197        sector_num += *pnum;
1198        num_checked += *pnum;
1199        if (ret) {
1200            num_used = num_checked;
1201        } else if (*pnum >= min) {
1202            break;
1203        }
1204    }
1205
1206    *pnum = num_used;
1207    return 1;
1208}
1209
1210/*
1211 * Compares two buffers sector by sector. Returns 0 if the first
1212 * sector of each buffer matches, non-zero otherwise.
1213 *
1214 * pnum is set to the sector-aligned size of the buffer prefix that
1215 * has the same matching status as the first sector.
1216 */
1217static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1218                           int64_t bytes, int64_t *pnum)
1219{
1220    bool res;
1221    int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
1222
1223    assert(bytes > 0);
1224
1225    res = !!memcmp(buf1, buf2, i);
1226    while (i < bytes) {
1227        int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
1228
1229        if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1230            break;
1231        }
1232        i += len;
1233    }
1234
1235    *pnum = i;
1236    return res;
1237}
1238
1239#define IO_BUF_SIZE (2 * MiB)
1240
1241/*
1242 * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1243 *
1244 * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1245 * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1246 * failure), and 4 on error (the exit status for read errors), after emitting
1247 * an error message.
1248 *
1249 * @param blk:  BlockBackend for the image
1250 * @param offset: Starting offset to check
1251 * @param bytes: Number of bytes to check
1252 * @param filename: Name of disk file we are checking (logging purpose)
1253 * @param buffer: Allocated buffer for storing read data
1254 * @param quiet: Flag for quiet mode
1255 */
1256static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1257                               int64_t bytes, const char *filename,
1258                               uint8_t *buffer, bool quiet)
1259{
1260    int ret = 0;
1261    int64_t idx;
1262
1263    ret = blk_pread(blk, offset, buffer, bytes);
1264    if (ret < 0) {
1265        error_report("Error while reading offset %" PRId64 " of %s: %s",
1266                     offset, filename, strerror(-ret));
1267        return 4;
1268    }
1269    idx = find_nonzero(buffer, bytes);
1270    if (idx >= 0) {
1271        qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1272                offset + idx);
1273        return 1;
1274    }
1275
1276    return 0;
1277}
1278
1279/*
1280 * Compares two images. Exit codes:
1281 *
1282 * 0 - Images are identical
1283 * 1 - Images differ
1284 * >1 - Error occurred
1285 */
1286static int img_compare(int argc, char **argv)
1287{
1288    const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1289    BlockBackend *blk1, *blk2;
1290    BlockDriverState *bs1, *bs2;
1291    int64_t total_size1, total_size2;
1292    uint8_t *buf1 = NULL, *buf2 = NULL;
1293    int64_t pnum1, pnum2;
1294    int allocated1, allocated2;
1295    int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1296    bool progress = false, quiet = false, strict = false;
1297    int flags;
1298    bool writethrough;
1299    int64_t total_size;
1300    int64_t offset = 0;
1301    int64_t chunk;
1302    int c;
1303    uint64_t progress_base;
1304    bool image_opts = false;
1305    bool force_share = false;
1306
1307    cache = BDRV_DEFAULT_CACHE;
1308    for (;;) {
1309        static const struct option long_options[] = {
1310            {"help", no_argument, 0, 'h'},
1311            {"object", required_argument, 0, OPTION_OBJECT},
1312            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1313            {"force-share", no_argument, 0, 'U'},
1314            {0, 0, 0, 0}
1315        };
1316        c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1317                        long_options, NULL);
1318        if (c == -1) {
1319            break;
1320        }
1321        switch (c) {
1322        case ':':
1323            missing_argument(argv[optind - 1]);
1324            break;
1325        case '?':
1326            unrecognized_option(argv[optind - 1]);
1327            break;
1328        case 'h':
1329            help();
1330            break;
1331        case 'f':
1332            fmt1 = optarg;
1333            break;
1334        case 'F':
1335            fmt2 = optarg;
1336            break;
1337        case 'T':
1338            cache = optarg;
1339            break;
1340        case 'p':
1341            progress = true;
1342            break;
1343        case 'q':
1344            quiet = true;
1345            break;
1346        case 's':
1347            strict = true;
1348            break;
1349        case 'U':
1350            force_share = true;
1351            break;
1352        case OPTION_OBJECT: {
1353            QemuOpts *opts;
1354            opts = qemu_opts_parse_noisily(&qemu_object_opts,
1355                                           optarg, true);
1356            if (!opts) {
1357                ret = 2;
1358                goto out4;
1359            }
1360        }   break;
1361        case OPTION_IMAGE_OPTS:
1362            image_opts = true;
1363            break;
1364        }
1365    }
1366
1367    /* Progress is not shown in Quiet mode */
1368    if (quiet) {
1369        progress = false;
1370    }
1371
1372
1373    if (optind != argc - 2) {
1374        error_exit("Expecting two image file names");
1375    }
1376    filename1 = argv[optind++];
1377    filename2 = argv[optind++];
1378
1379    if (qemu_opts_foreach(&qemu_object_opts,
1380                          user_creatable_add_opts_foreach,
1381                          qemu_img_object_print_help, &error_fatal)) {
1382        ret = 2;
1383        goto out4;
1384    }
1385
1386    /* Initialize before goto out */
1387    qemu_progress_init(progress, 2.0);
1388
1389    flags = 0;
1390    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1391    if (ret < 0) {
1392        error_report("Invalid source cache option: %s", cache);
1393        ret = 2;
1394        goto out3;
1395    }
1396
1397    blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1398                    force_share);
1399    if (!blk1) {
1400        ret = 2;
1401        goto out3;
1402    }
1403
1404    blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1405                    force_share);
1406    if (!blk2) {
1407        ret = 2;
1408        goto out2;
1409    }
1410    bs1 = blk_bs(blk1);
1411    bs2 = blk_bs(blk2);
1412
1413    buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1414    buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1415    total_size1 = blk_getlength(blk1);
1416    if (total_size1 < 0) {
1417        error_report("Can't get size of %s: %s",
1418                     filename1, strerror(-total_size1));
1419        ret = 4;
1420        goto out;
1421    }
1422    total_size2 = blk_getlength(blk2);
1423    if (total_size2 < 0) {
1424        error_report("Can't get size of %s: %s",
1425                     filename2, strerror(-total_size2));
1426        ret = 4;
1427        goto out;
1428    }
1429    total_size = MIN(total_size1, total_size2);
1430    progress_base = MAX(total_size1, total_size2);
1431
1432    qemu_progress_print(0, 100);
1433
1434    if (strict && total_size1 != total_size2) {
1435        ret = 1;
1436        qprintf(quiet, "Strict mode: Image size mismatch!\n");
1437        goto out;
1438    }
1439
1440    while (offset < total_size) {
1441        int status1, status2;
1442
1443        status1 = bdrv_block_status_above(bs1, NULL, offset,
1444                                          total_size1 - offset, &pnum1, NULL,
1445                                          NULL);
1446        if (status1 < 0) {
1447            ret = 3;
1448            error_report("Sector allocation test failed for %s", filename1);
1449            goto out;
1450        }
1451        allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1452
1453        status2 = bdrv_block_status_above(bs2, NULL, offset,
1454                                          total_size2 - offset, &pnum2, NULL,
1455                                          NULL);
1456        if (status2 < 0) {
1457            ret = 3;
1458            error_report("Sector allocation test failed for %s", filename2);
1459            goto out;
1460        }
1461        allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1462
1463        assert(pnum1 && pnum2);
1464        chunk = MIN(pnum1, pnum2);
1465
1466        if (strict) {
1467            if (status1 != status2) {
1468                ret = 1;
1469                qprintf(quiet, "Strict mode: Offset %" PRId64
1470                        " block status mismatch!\n", offset);
1471                goto out;
1472            }
1473        }
1474        if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1475            /* nothing to do */
1476        } else if (allocated1 == allocated2) {
1477            if (allocated1) {
1478                int64_t pnum;
1479
1480                chunk = MIN(chunk, IO_BUF_SIZE);
1481                ret = blk_pread(blk1, offset, buf1, chunk);
1482                if (ret < 0) {
1483                    error_report("Error while reading offset %" PRId64
1484                                 " of %s: %s",
1485                                 offset, filename1, strerror(-ret));
1486                    ret = 4;
1487                    goto out;
1488                }
1489                ret = blk_pread(blk2, offset, buf2, chunk);
1490                if (ret < 0) {
1491                    error_report("Error while reading offset %" PRId64
1492                                 " of %s: %s",
1493                                 offset, filename2, strerror(-ret));
1494                    ret = 4;
1495                    goto out;
1496                }
1497                ret = compare_buffers(buf1, buf2, chunk, &pnum);
1498                if (ret || pnum != chunk) {
1499                    qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1500                            offset + (ret ? 0 : pnum));
1501                    ret = 1;
1502                    goto out;
1503                }
1504            }
1505        } else {
1506            chunk = MIN(chunk, IO_BUF_SIZE);
1507            if (allocated1) {
1508                ret = check_empty_sectors(blk1, offset, chunk,
1509                                          filename1, buf1, quiet);
1510            } else {
1511                ret = check_empty_sectors(blk2, offset, chunk,
1512                                          filename2, buf1, quiet);
1513            }
1514            if (ret) {
1515                goto out;
1516            }
1517        }
1518        offset += chunk;
1519        qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1520    }
1521
1522    if (total_size1 != total_size2) {
1523        BlockBackend *blk_over;
1524        const char *filename_over;
1525
1526        qprintf(quiet, "Warning: Image size mismatch!\n");
1527        if (total_size1 > total_size2) {
1528            blk_over = blk1;
1529            filename_over = filename1;
1530        } else {
1531            blk_over = blk2;
1532            filename_over = filename2;
1533        }
1534
1535        while (offset < progress_base) {
1536            ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1537                                          progress_base - offset, &chunk,
1538                                          NULL, NULL);
1539            if (ret < 0) {
1540                ret = 3;
1541                error_report("Sector allocation test failed for %s",
1542                             filename_over);
1543                goto out;
1544
1545            }
1546            if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1547                chunk = MIN(chunk, IO_BUF_SIZE);
1548                ret = check_empty_sectors(blk_over, offset, chunk,
1549                                          filename_over, buf1, quiet);
1550                if (ret) {
1551                    goto out;
1552                }
1553            }
1554            offset += chunk;
1555            qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1556        }
1557    }
1558
1559    qprintf(quiet, "Images are identical.\n");
1560    ret = 0;
1561
1562out:
1563    qemu_vfree(buf1);
1564    qemu_vfree(buf2);
1565    blk_unref(blk2);
1566out2:
1567    blk_unref(blk1);
1568out3:
1569    qemu_progress_end();
1570out4:
1571    return ret;
1572}
1573
1574enum ImgConvertBlockStatus {
1575    BLK_DATA,
1576    BLK_ZERO,
1577    BLK_BACKING_FILE,
1578};
1579
1580#define MAX_COROUTINES 16
1581
1582typedef struct ImgConvertState {
1583    BlockBackend **src;
1584    int64_t *src_sectors;
1585    int src_num;
1586    int64_t total_sectors;
1587    int64_t allocated_sectors;
1588    int64_t allocated_done;
1589    int64_t sector_num;
1590    int64_t wr_offs;
1591    enum ImgConvertBlockStatus status;
1592    int64_t sector_next_status;
1593    BlockBackend *target;
1594    bool has_zero_init;
1595    bool compressed;
1596    bool unallocated_blocks_are_zero;
1597    bool target_is_new;
1598    bool target_has_backing;
1599    int64_t target_backing_sectors; /* negative if unknown */
1600    bool wr_in_order;
1601    bool copy_range;
1602    bool salvage;
1603    bool quiet;
1604    int min_sparse;
1605    int alignment;
1606    size_t cluster_sectors;
1607    size_t buf_sectors;
1608    long num_coroutines;
1609    int running_coroutines;
1610    Coroutine *co[MAX_COROUTINES];
1611    int64_t wait_sector_num[MAX_COROUTINES];
1612    CoMutex lock;
1613    int ret;
1614} ImgConvertState;
1615
1616static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1617                                int *src_cur, int64_t *src_cur_offset)
1618{
1619    *src_cur = 0;
1620    *src_cur_offset = 0;
1621    while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1622        *src_cur_offset += s->src_sectors[*src_cur];
1623        (*src_cur)++;
1624        assert(*src_cur < s->src_num);
1625    }
1626}
1627
1628static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1629{
1630    int64_t src_cur_offset;
1631    int ret, n, src_cur;
1632    bool post_backing_zero = false;
1633
1634    convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1635
1636    assert(s->total_sectors > sector_num);
1637    n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1638
1639    if (s->target_backing_sectors >= 0) {
1640        if (sector_num >= s->target_backing_sectors) {
1641            post_backing_zero = s->unallocated_blocks_are_zero;
1642        } else if (sector_num + n > s->target_backing_sectors) {
1643            /* Split requests around target_backing_sectors (because
1644             * starting from there, zeros are handled differently) */
1645            n = s->target_backing_sectors - sector_num;
1646        }
1647    }
1648
1649    if (s->sector_next_status <= sector_num) {
1650        uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1651        int64_t count;
1652
1653        do {
1654            count = n * BDRV_SECTOR_SIZE;
1655
1656            if (s->target_has_backing) {
1657                ret = bdrv_block_status(blk_bs(s->src[src_cur]), offset,
1658                                        count, &count, NULL, NULL);
1659            } else {
1660                ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL,
1661                                              offset, count, &count, NULL,
1662                                              NULL);
1663            }
1664
1665            if (ret < 0) {
1666                if (s->salvage) {
1667                    if (n == 1) {
1668                        if (!s->quiet) {
1669                            warn_report("error while reading block status at "
1670                                        "offset %" PRIu64 ": %s", offset,
1671                                        strerror(-ret));
1672                        }
1673                        /* Just try to read the data, then */
1674                        ret = BDRV_BLOCK_DATA;
1675                        count = BDRV_SECTOR_SIZE;
1676                    } else {
1677                        /* Retry on a shorter range */
1678                        n = DIV_ROUND_UP(n, 4);
1679                    }
1680                } else {
1681                    error_report("error while reading block status at offset "
1682                                 "%" PRIu64 ": %s", offset, strerror(-ret));
1683                    return ret;
1684                }
1685            }
1686        } while (ret < 0);
1687
1688        n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1689
1690        if (ret & BDRV_BLOCK_ZERO) {
1691            s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1692        } else if (ret & BDRV_BLOCK_DATA) {
1693            s->status = BLK_DATA;
1694        } else {
1695            s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1696        }
1697
1698        s->sector_next_status = sector_num + n;
1699    }
1700
1701    n = MIN(n, s->sector_next_status - sector_num);
1702    if (s->status == BLK_DATA) {
1703        n = MIN(n, s->buf_sectors);
1704    }
1705
1706    /* We need to write complete clusters for compressed images, so if an
1707     * unallocated area is shorter than that, we must consider the whole
1708     * cluster allocated. */
1709    if (s->compressed) {
1710        if (n < s->cluster_sectors) {
1711            n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1712            s->status = BLK_DATA;
1713        } else {
1714            n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1715        }
1716    }
1717
1718    return n;
1719}
1720
1721static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1722                                        int nb_sectors, uint8_t *buf)
1723{
1724    uint64_t single_read_until = 0;
1725    int n, ret;
1726
1727    assert(nb_sectors <= s->buf_sectors);
1728    while (nb_sectors > 0) {
1729        BlockBackend *blk;
1730        int src_cur;
1731        int64_t bs_sectors, src_cur_offset;
1732        uint64_t offset;
1733
1734        /* In the case of compression with multiple source files, we can get a
1735         * nb_sectors that spreads into the next part. So we must be able to
1736         * read across multiple BDSes for one convert_read() call. */
1737        convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1738        blk = s->src[src_cur];
1739        bs_sectors = s->src_sectors[src_cur];
1740
1741        offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1742
1743        n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1744        if (single_read_until > offset) {
1745            n = 1;
1746        }
1747
1748        ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1749        if (ret < 0) {
1750            if (s->salvage) {
1751                if (n > 1) {
1752                    single_read_until = offset + (n << BDRV_SECTOR_BITS);
1753                    continue;
1754                } else {
1755                    if (!s->quiet) {
1756                        warn_report("error while reading offset %" PRIu64
1757                                    ": %s", offset, strerror(-ret));
1758                    }
1759                    memset(buf, 0, BDRV_SECTOR_SIZE);
1760                }
1761            } else {
1762                return ret;
1763            }
1764        }
1765
1766        sector_num += n;
1767        nb_sectors -= n;
1768        buf += n * BDRV_SECTOR_SIZE;
1769    }
1770
1771    return 0;
1772}
1773
1774
1775static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1776                                         int nb_sectors, uint8_t *buf,
1777                                         enum ImgConvertBlockStatus status)
1778{
1779    int ret;
1780
1781    while (nb_sectors > 0) {
1782        int n = nb_sectors;
1783        BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1784
1785        switch (status) {
1786        case BLK_BACKING_FILE:
1787            /* If we have a backing file, leave clusters unallocated that are
1788             * unallocated in the source image, so that the backing file is
1789             * visible at the respective offset. */
1790            assert(s->target_has_backing);
1791            break;
1792
1793        case BLK_DATA:
1794            /* If we're told to keep the target fully allocated (-S 0) or there
1795             * is real non-zero data, we must write it. Otherwise we can treat
1796             * it as zero sectors.
1797             * Compressed clusters need to be written as a whole, so in that
1798             * case we can only save the write if the buffer is completely
1799             * zeroed. */
1800            if (!s->min_sparse ||
1801                (!s->compressed &&
1802                 is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1803                                          sector_num, s->alignment)) ||
1804                (s->compressed &&
1805                 !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1806            {
1807                ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1808                                    n << BDRV_SECTOR_BITS, buf, flags);
1809                if (ret < 0) {
1810                    return ret;
1811                }
1812                break;
1813            }
1814            /* fall-through */
1815
1816        case BLK_ZERO:
1817            if (s->has_zero_init) {
1818                assert(!s->target_has_backing);
1819                break;
1820            }
1821            ret = blk_co_pwrite_zeroes(s->target,
1822                                       sector_num << BDRV_SECTOR_BITS,
1823                                       n << BDRV_SECTOR_BITS,
1824                                       BDRV_REQ_MAY_UNMAP);
1825            if (ret < 0) {
1826                return ret;
1827            }
1828            break;
1829        }
1830
1831        sector_num += n;
1832        nb_sectors -= n;
1833        buf += n * BDRV_SECTOR_SIZE;
1834    }
1835
1836    return 0;
1837}
1838
1839static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1840                                              int nb_sectors)
1841{
1842    int n, ret;
1843
1844    while (nb_sectors > 0) {
1845        BlockBackend *blk;
1846        int src_cur;
1847        int64_t bs_sectors, src_cur_offset;
1848        int64_t offset;
1849
1850        convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1851        offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1852        blk = s->src[src_cur];
1853        bs_sectors = s->src_sectors[src_cur];
1854
1855        n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1856
1857        ret = blk_co_copy_range(blk, offset, s->target,
1858                                sector_num << BDRV_SECTOR_BITS,
1859                                n << BDRV_SECTOR_BITS, 0, 0);
1860        if (ret < 0) {
1861            return ret;
1862        }
1863
1864        sector_num += n;
1865        nb_sectors -= n;
1866    }
1867    return 0;
1868}
1869
1870static void coroutine_fn convert_co_do_copy(void *opaque)
1871{
1872    ImgConvertState *s = opaque;
1873    uint8_t *buf = NULL;
1874    int ret, i;
1875    int index = -1;
1876
1877    for (i = 0; i < s->num_coroutines; i++) {
1878        if (s->co[i] == qemu_coroutine_self()) {
1879            index = i;
1880            break;
1881        }
1882    }
1883    assert(index >= 0);
1884
1885    s->running_coroutines++;
1886    buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1887
1888    while (1) {
1889        int n;
1890        int64_t sector_num;
1891        enum ImgConvertBlockStatus status;
1892        bool copy_range;
1893
1894        qemu_co_mutex_lock(&s->lock);
1895        if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1896            qemu_co_mutex_unlock(&s->lock);
1897            break;
1898        }
1899        n = convert_iteration_sectors(s, s->sector_num);
1900        if (n < 0) {
1901            qemu_co_mutex_unlock(&s->lock);
1902            s->ret = n;
1903            break;
1904        }
1905        /* save current sector and allocation status to local variables */
1906        sector_num = s->sector_num;
1907        status = s->status;
1908        if (!s->min_sparse && s->status == BLK_ZERO) {
1909            n = MIN(n, s->buf_sectors);
1910        }
1911        /* increment global sector counter so that other coroutines can
1912         * already continue reading beyond this request */
1913        s->sector_num += n;
1914        qemu_co_mutex_unlock(&s->lock);
1915
1916        if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
1917            s->allocated_done += n;
1918            qemu_progress_print(100.0 * s->allocated_done /
1919                                        s->allocated_sectors, 0);
1920        }
1921
1922retry:
1923        copy_range = s->copy_range && s->status == BLK_DATA;
1924        if (status == BLK_DATA && !copy_range) {
1925            ret = convert_co_read(s, sector_num, n, buf);
1926            if (ret < 0) {
1927                error_report("error while reading at byte %lld: %s",
1928                             sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
1929                s->ret = ret;
1930            }
1931        } else if (!s->min_sparse && status == BLK_ZERO) {
1932            status = BLK_DATA;
1933            memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
1934        }
1935
1936        if (s->wr_in_order) {
1937            /* keep writes in order */
1938            while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
1939                s->wait_sector_num[index] = sector_num;
1940                qemu_coroutine_yield();
1941            }
1942            s->wait_sector_num[index] = -1;
1943        }
1944
1945        if (s->ret == -EINPROGRESS) {
1946            if (copy_range) {
1947                ret = convert_co_copy_range(s, sector_num, n);
1948                if (ret) {
1949                    s->copy_range = false;
1950                    goto retry;
1951                }
1952            } else {
1953                ret = convert_co_write(s, sector_num, n, buf, status);
1954            }
1955            if (ret < 0) {
1956                error_report("error while writing at byte %lld: %s",
1957                             sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
1958                s->ret = ret;
1959            }
1960        }
1961
1962        if (s->wr_in_order) {
1963            /* reenter the coroutine that might have waited
1964             * for this write to complete */
1965            s->wr_offs = sector_num + n;
1966            for (i = 0; i < s->num_coroutines; i++) {
1967                if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
1968                    /*
1969                     * A -> B -> A cannot occur because A has
1970                     * s->wait_sector_num[i] == -1 during A -> B.  Therefore
1971                     * B will never enter A during this time window.
1972                     */
1973                    qemu_coroutine_enter(s->co[i]);
1974                    break;
1975                }
1976            }
1977        }
1978    }
1979
1980    qemu_vfree(buf);
1981    s->co[index] = NULL;
1982    s->running_coroutines--;
1983    if (!s->running_coroutines && s->ret == -EINPROGRESS) {
1984        /* the convert job finished successfully */
1985        s->ret = 0;
1986    }
1987}
1988
1989static int convert_do_copy(ImgConvertState *s)
1990{
1991    int ret, i, n;
1992    int64_t sector_num = 0;
1993
1994    /* Check whether we have zero initialisation or can get it efficiently */
1995    if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
1996        !s->target_has_backing) {
1997        s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
1998    }
1999
2000    if (!s->has_zero_init && !s->target_has_backing &&
2001        bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
2002    {
2003        ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
2004        if (ret == 0) {
2005            s->has_zero_init = true;
2006        }
2007    }
2008
2009    /* Allocate buffer for copied data. For compressed images, only one cluster
2010     * can be copied at a time. */
2011    if (s->compressed) {
2012        if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2013            error_report("invalid cluster size");
2014            return -EINVAL;
2015        }
2016        s->buf_sectors = s->cluster_sectors;
2017    }
2018
2019    while (sector_num < s->total_sectors) {
2020        n = convert_iteration_sectors(s, sector_num);
2021        if (n < 0) {
2022            return n;
2023        }
2024        if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2025        {
2026            s->allocated_sectors += n;
2027        }
2028        sector_num += n;
2029    }
2030
2031    /* Do the copy */
2032    s->sector_next_status = 0;
2033    s->ret = -EINPROGRESS;
2034
2035    qemu_co_mutex_init(&s->lock);
2036    for (i = 0; i < s->num_coroutines; i++) {
2037        s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2038        s->wait_sector_num[i] = -1;
2039        qemu_coroutine_enter(s->co[i]);
2040    }
2041
2042    while (s->running_coroutines) {
2043        main_loop_wait(false);
2044    }
2045
2046    if (s->compressed && !s->ret) {
2047        /* signal EOF to align */
2048        ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
2049        if (ret < 0) {
2050            return ret;
2051        }
2052    }
2053
2054    return s->ret;
2055}
2056
2057#define MAX_BUF_SECTORS 32768
2058
2059static int img_convert(int argc, char **argv)
2060{
2061    int c, bs_i, flags, src_flags = 0;
2062    const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2063               *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2064               *out_filename, *out_baseimg_param, *snapshot_name = NULL;
2065    BlockDriver *drv = NULL, *proto_drv = NULL;
2066    BlockDriverInfo bdi;
2067    BlockDriverState *out_bs;
2068    QemuOpts *opts = NULL, *sn_opts = NULL;
2069    QemuOptsList *create_opts = NULL;
2070    QDict *open_opts = NULL;
2071    char *options = NULL;
2072    Error *local_err = NULL;
2073    bool writethrough, src_writethrough, image_opts = false,
2074         skip_create = false, progress = false, tgt_image_opts = false;
2075    int64_t ret = -EINVAL;
2076    bool force_share = false;
2077    bool explict_min_sparse = false;
2078
2079    ImgConvertState s = (ImgConvertState) {
2080        /* Need at least 4k of zeros for sparse detection */
2081        .min_sparse         = 8,
2082        .copy_range         = false,
2083        .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2084        .wr_in_order        = true,
2085        .num_coroutines     = 8,
2086    };
2087
2088    for(;;) {
2089        static const struct option long_options[] = {
2090            {"help", no_argument, 0, 'h'},
2091            {"object", required_argument, 0, OPTION_OBJECT},
2092            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2093            {"force-share", no_argument, 0, 'U'},
2094            {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2095            {"salvage", no_argument, 0, OPTION_SALVAGE},
2096            {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2097            {0, 0, 0, 0}
2098        };
2099        c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU",
2100                        long_options, NULL);
2101        if (c == -1) {
2102            break;
2103        }
2104        switch(c) {
2105        case ':':
2106            missing_argument(argv[optind - 1]);
2107            break;
2108        case '?':
2109            unrecognized_option(argv[optind - 1]);
2110            break;
2111        case 'h':
2112            help();
2113            break;
2114        case 'f':
2115            fmt = optarg;
2116            break;
2117        case 'O':
2118            out_fmt = optarg;
2119            break;
2120        case 'B':
2121            out_baseimg = optarg;
2122            break;
2123        case 'C':
2124            s.copy_range = true;
2125            break;
2126        case 'c':
2127            s.compressed = true;
2128            break;
2129        case 'o':
2130            if (!is_valid_option_list(optarg)) {
2131                error_report("Invalid option list: %s", optarg);
2132                goto fail_getopt;
2133            }
2134            if (!options) {
2135                options = g_strdup(optarg);
2136            } else {
2137                char *old_options = options;
2138                options = g_strdup_printf("%s,%s", options, optarg);
2139                g_free(old_options);
2140            }
2141            break;
2142        case 'l':
2143            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2144                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2145                                                  optarg, false);
2146                if (!sn_opts) {
2147                    error_report("Failed in parsing snapshot param '%s'",
2148                                 optarg);
2149                    goto fail_getopt;
2150                }
2151            } else {
2152                snapshot_name = optarg;
2153            }
2154            break;
2155        case 'S':
2156        {
2157            int64_t sval;
2158
2159            sval = cvtnum(optarg);
2160            if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2161                sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2162                error_report("Invalid buffer size for sparse output specified. "
2163                    "Valid sizes are multiples of %llu up to %llu. Select "
2164                    "0 to disable sparse detection (fully allocates output).",
2165                    BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2166                goto fail_getopt;
2167            }
2168
2169            s.min_sparse = sval / BDRV_SECTOR_SIZE;
2170            explict_min_sparse = true;
2171            break;
2172        }
2173        case 'p':
2174            progress = true;
2175            break;
2176        case 't':
2177            cache = optarg;
2178            break;
2179        case 'T':
2180            src_cache = optarg;
2181            break;
2182        case 'q':
2183            s.quiet = true;
2184            break;
2185        case 'n':
2186            skip_create = true;
2187            break;
2188        case 'm':
2189            if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2190                s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2191                error_report("Invalid number of coroutines. Allowed number of"
2192                             " coroutines is between 1 and %d", MAX_COROUTINES);
2193                goto fail_getopt;
2194            }
2195            break;
2196        case 'W':
2197            s.wr_in_order = false;
2198            break;
2199        case 'U':
2200            force_share = true;
2201            break;
2202        case OPTION_OBJECT: {
2203            QemuOpts *object_opts;
2204            object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
2205                                                  optarg, true);
2206            if (!object_opts) {
2207                goto fail_getopt;
2208            }
2209            break;
2210        }
2211        case OPTION_IMAGE_OPTS:
2212            image_opts = true;
2213            break;
2214        case OPTION_SALVAGE:
2215            s.salvage = true;
2216            break;
2217        case OPTION_TARGET_IMAGE_OPTS:
2218            tgt_image_opts = true;
2219            break;
2220        case OPTION_TARGET_IS_ZERO:
2221            /*
2222             * The user asserting that the target is blank has the
2223             * same effect as the target driver supporting zero
2224             * initialisation.
2225             */
2226            s.has_zero_init = true;
2227            break;
2228        }
2229    }
2230
2231    if (!out_fmt && !tgt_image_opts) {
2232        out_fmt = "raw";
2233    }
2234
2235    if (qemu_opts_foreach(&qemu_object_opts,
2236                          user_creatable_add_opts_foreach,
2237                          qemu_img_object_print_help, &error_fatal)) {
2238        goto fail_getopt;
2239    }
2240
2241    if (s.compressed && s.copy_range) {
2242        error_report("Cannot enable copy offloading when -c is used");
2243        goto fail_getopt;
2244    }
2245
2246    if (explict_min_sparse && s.copy_range) {
2247        error_report("Cannot enable copy offloading when -S is used");
2248        goto fail_getopt;
2249    }
2250
2251    if (s.copy_range && s.salvage) {
2252        error_report("Cannot use copy offloading in salvaging mode");
2253        goto fail_getopt;
2254    }
2255
2256    if (tgt_image_opts && !skip_create) {
2257        error_report("--target-image-opts requires use of -n flag");
2258        goto fail_getopt;
2259    }
2260
2261    if (skip_create && options) {
2262        warn_report("-o has no effect when skipping image creation");
2263        warn_report("This will become an error in future QEMU versions.");
2264    }
2265
2266    if (s.has_zero_init && !skip_create) {
2267        error_report("--target-is-zero requires use of -n flag");
2268        goto fail_getopt;
2269    }
2270
2271    s.src_num = argc - optind - 1;
2272    out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2273
2274    if (options && has_help_option(options)) {
2275        if (out_fmt) {
2276            ret = print_block_option_help(out_filename, out_fmt);
2277            goto fail_getopt;
2278        } else {
2279            error_report("Option help requires a format be specified");
2280            goto fail_getopt;
2281        }
2282    }
2283
2284    if (s.src_num < 1) {
2285        error_report("Must specify image file name");
2286        goto fail_getopt;
2287    }
2288
2289
2290    /* ret is still -EINVAL until here */
2291    ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2292    if (ret < 0) {
2293        error_report("Invalid source cache option: %s", src_cache);
2294        goto fail_getopt;
2295    }
2296
2297    /* Initialize before goto out */
2298    if (s.quiet) {
2299        progress = false;
2300    }
2301    qemu_progress_init(progress, 1.0);
2302    qemu_progress_print(0, 100);
2303
2304    s.src = g_new0(BlockBackend *, s.src_num);
2305    s.src_sectors = g_new(int64_t, s.src_num);
2306
2307    for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2308        s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2309                               fmt, src_flags, src_writethrough, s.quiet,
2310                               force_share);
2311        if (!s.src[bs_i]) {
2312            ret = -1;
2313            goto out;
2314        }
2315        s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2316        if (s.src_sectors[bs_i] < 0) {
2317            error_report("Could not get size of %s: %s",
2318                         argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2319            ret = -1;
2320            goto out;
2321        }
2322        s.total_sectors += s.src_sectors[bs_i];
2323    }
2324
2325    if (sn_opts) {
2326        bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2327                               qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2328                               qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2329                               &local_err);
2330    } else if (snapshot_name != NULL) {
2331        if (s.src_num > 1) {
2332            error_report("No support for concatenating multiple snapshot");
2333            ret = -1;
2334            goto out;
2335        }
2336
2337        bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2338                                             &local_err);
2339    }
2340    if (local_err) {
2341        error_reportf_err(local_err, "Failed to load snapshot: ");
2342        ret = -1;
2343        goto out;
2344    }
2345
2346    if (!skip_create) {
2347        /* Find driver and parse its options */
2348        drv = bdrv_find_format(out_fmt);
2349        if (!drv) {
2350            error_report("Unknown file format '%s'", out_fmt);
2351            ret = -1;
2352            goto out;
2353        }
2354
2355        proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2356        if (!proto_drv) {
2357            error_report_err(local_err);
2358            ret = -1;
2359            goto out;
2360        }
2361
2362        if (!drv->create_opts) {
2363            error_report("Format driver '%s' does not support image creation",
2364                         drv->format_name);
2365            ret = -1;
2366            goto out;
2367        }
2368
2369        if (!proto_drv->create_opts) {
2370            error_report("Protocol driver '%s' does not support image creation",
2371                         proto_drv->format_name);
2372            ret = -1;
2373            goto out;
2374        }
2375
2376        create_opts = qemu_opts_append(create_opts, drv->create_opts);
2377        create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2378
2379        opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2380        if (options) {
2381            qemu_opts_do_parse(opts, options, NULL, &local_err);
2382            if (local_err) {
2383                error_report_err(local_err);
2384                ret = -1;
2385                goto out;
2386            }
2387        }
2388
2389        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s.total_sectors * 512,
2390                            &error_abort);
2391        ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2392        if (ret < 0) {
2393            goto out;
2394        }
2395    }
2396
2397    /* Get backing file name if -o backing_file was used */
2398    out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2399    if (out_baseimg_param) {
2400        out_baseimg = out_baseimg_param;
2401    }
2402    s.target_has_backing = (bool) out_baseimg;
2403
2404    if (s.has_zero_init && s.target_has_backing) {
2405        error_report("Cannot use --target-is-zero when the destination "
2406                     "image has a backing file");
2407        goto out;
2408    }
2409
2410    if (s.src_num > 1 && out_baseimg) {
2411        error_report("Having a backing file for the target makes no sense when "
2412                     "concatenating multiple input images");
2413        ret = -1;
2414        goto out;
2415    }
2416
2417    /* Check if compression is supported */
2418    if (s.compressed) {
2419        bool encryption =
2420            qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2421        const char *encryptfmt =
2422            qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2423        const char *preallocation =
2424            qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2425
2426        if (drv && !block_driver_can_compress(drv)) {
2427            error_report("Compression not supported for this file format");
2428            ret = -1;
2429            goto out;
2430        }
2431
2432        if (encryption || encryptfmt) {
2433            error_report("Compression and encryption not supported at "
2434                         "the same time");
2435            ret = -1;
2436            goto out;
2437        }
2438
2439        if (preallocation
2440            && strcmp(preallocation, "off"))
2441        {
2442            error_report("Compression and preallocation not supported at "
2443                         "the same time");
2444            ret = -1;
2445            goto out;
2446        }
2447    }
2448
2449    /*
2450     * The later open call will need any decryption secrets, and
2451     * bdrv_create() will purge "opts", so extract them now before
2452     * they are lost.
2453     */
2454    if (!skip_create) {
2455        open_opts = qdict_new();
2456        qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2457    }
2458
2459    if (!skip_create) {
2460        /* Create the new image */
2461        ret = bdrv_create(drv, out_filename, opts, &local_err);
2462        if (ret < 0) {
2463            error_reportf_err(local_err, "%s: error while converting %s: ",
2464                              out_filename, out_fmt);
2465            goto out;
2466        }
2467    }
2468
2469    s.target_is_new = !skip_create;
2470
2471    flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2472    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2473    if (ret < 0) {
2474        error_report("Invalid cache option: %s", cache);
2475        goto out;
2476    }
2477
2478    if (skip_create) {
2479        s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2480                            flags, writethrough, s.quiet, false);
2481    } else {
2482        /* TODO ultimately we should allow --target-image-opts
2483         * to be used even when -n is not given.
2484         * That has to wait for bdrv_create to be improved
2485         * to allow filenames in option syntax
2486         */
2487        s.target = img_open_file(out_filename, open_opts, out_fmt,
2488                                 flags, writethrough, s.quiet, false);
2489        open_opts = NULL; /* blk_new_open will have freed it */
2490    }
2491    if (!s.target) {
2492        ret = -1;
2493        goto out;
2494    }
2495    out_bs = blk_bs(s.target);
2496
2497    if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2498        error_report("Compression not supported for this file format");
2499        ret = -1;
2500        goto out;
2501    }
2502
2503    /* increase bufsectors from the default 4096 (2M) if opt_transfer
2504     * or discard_alignment of the out_bs is greater. Limit to
2505     * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2506    s.buf_sectors = MIN(MAX_BUF_SECTORS,
2507                        MAX(s.buf_sectors,
2508                            MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2509                                out_bs->bl.pdiscard_alignment >>
2510                                BDRV_SECTOR_BITS)));
2511
2512    /* try to align the write requests to the destination to avoid unnecessary
2513     * RMW cycles. */
2514    s.alignment = MAX(pow2floor(s.min_sparse),
2515                      DIV_ROUND_UP(out_bs->bl.request_alignment,
2516                                   BDRV_SECTOR_SIZE));
2517    assert(is_power_of_2(s.alignment));
2518
2519    if (skip_create) {
2520        int64_t output_sectors = blk_nb_sectors(s.target);
2521        if (output_sectors < 0) {
2522            error_report("unable to get output image length: %s",
2523                         strerror(-output_sectors));
2524            ret = -1;
2525            goto out;
2526        } else if (output_sectors < s.total_sectors) {
2527            error_report("output file is smaller than input file");
2528            ret = -1;
2529            goto out;
2530        }
2531    }
2532
2533    if (s.target_has_backing && s.target_is_new) {
2534        /* Errors are treated as "backing length unknown" (which means
2535         * s.target_backing_sectors has to be negative, which it will
2536         * be automatically).  The backing file length is used only
2537         * for optimizations, so such a case is not fatal. */
2538        s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs);
2539    } else {
2540        s.target_backing_sectors = -1;
2541    }
2542
2543    ret = bdrv_get_info(out_bs, &bdi);
2544    if (ret < 0) {
2545        if (s.compressed) {
2546            error_report("could not get block driver info");
2547            goto out;
2548        }
2549    } else {
2550        s.compressed = s.compressed || bdi.needs_compressed_writes;
2551        s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2552        s.unallocated_blocks_are_zero = bdi.unallocated_blocks_are_zero;
2553    }
2554
2555    ret = convert_do_copy(&s);
2556out:
2557    if (!ret) {
2558        qemu_progress_print(100, 0);
2559    }
2560    qemu_progress_end();
2561    qemu_opts_del(opts);
2562    qemu_opts_free(create_opts);
2563    qemu_opts_del(sn_opts);
2564    qobject_unref(open_opts);
2565    blk_unref(s.target);
2566    if (s.src) {
2567        for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2568            blk_unref(s.src[bs_i]);
2569        }
2570        g_free(s.src);
2571    }
2572    g_free(s.src_sectors);
2573fail_getopt:
2574    g_free(options);
2575
2576    return !!ret;
2577}
2578
2579
2580static void dump_snapshots(BlockDriverState *bs)
2581{
2582    QEMUSnapshotInfo *sn_tab, *sn;
2583    int nb_sns, i;
2584
2585    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2586    if (nb_sns <= 0)
2587        return;
2588    printf("Snapshot list:\n");
2589    bdrv_snapshot_dump(NULL);
2590    printf("\n");
2591    for(i = 0; i < nb_sns; i++) {
2592        sn = &sn_tab[i];
2593        bdrv_snapshot_dump(sn);
2594        printf("\n");
2595    }
2596    g_free(sn_tab);
2597}
2598
2599static void dump_json_image_info_list(ImageInfoList *list)
2600{
2601    QString *str;
2602    QObject *obj;
2603    Visitor *v = qobject_output_visitor_new(&obj);
2604
2605    visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2606    visit_complete(v, &obj);
2607    str = qobject_to_json_pretty(obj);
2608    assert(str != NULL);
2609    printf("%s\n", qstring_get_str(str));
2610    qobject_unref(obj);
2611    visit_free(v);
2612    qobject_unref(str);
2613}
2614
2615static void dump_json_image_info(ImageInfo *info)
2616{
2617    QString *str;
2618    QObject *obj;
2619    Visitor *v = qobject_output_visitor_new(&obj);
2620
2621    visit_type_ImageInfo(v, NULL, &info, &error_abort);
2622    visit_complete(v, &obj);
2623    str = qobject_to_json_pretty(obj);
2624    assert(str != NULL);
2625    printf("%s\n", qstring_get_str(str));
2626    qobject_unref(obj);
2627    visit_free(v);
2628    qobject_unref(str);
2629}
2630
2631static void dump_human_image_info_list(ImageInfoList *list)
2632{
2633    ImageInfoList *elem;
2634    bool delim = false;
2635
2636    for (elem = list; elem; elem = elem->next) {
2637        if (delim) {
2638            printf("\n");
2639        }
2640        delim = true;
2641
2642        bdrv_image_info_dump(elem->value);
2643    }
2644}
2645
2646static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2647{
2648    return strcmp(a, b) == 0;
2649}
2650
2651/**
2652 * Open an image file chain and return an ImageInfoList
2653 *
2654 * @filename: topmost image filename
2655 * @fmt: topmost image format (may be NULL to autodetect)
2656 * @chain: true  - enumerate entire backing file chain
2657 *         false - only topmost image file
2658 *
2659 * Returns a list of ImageInfo objects or NULL if there was an error opening an
2660 * image file.  If there was an error a message will have been printed to
2661 * stderr.
2662 */
2663static ImageInfoList *collect_image_info_list(bool image_opts,
2664                                              const char *filename,
2665                                              const char *fmt,
2666                                              bool chain, bool force_share)
2667{
2668    ImageInfoList *head = NULL;
2669    ImageInfoList **last = &head;
2670    GHashTable *filenames;
2671    Error *err = NULL;
2672
2673    filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2674
2675    while (filename) {
2676        BlockBackend *blk;
2677        BlockDriverState *bs;
2678        ImageInfo *info;
2679        ImageInfoList *elem;
2680
2681        if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2682            error_report("Backing file '%s' creates an infinite loop.",
2683                         filename);
2684            goto err;
2685        }
2686        g_hash_table_insert(filenames, (gpointer)filename, NULL);
2687
2688        blk = img_open(image_opts, filename, fmt,
2689                       BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2690                       force_share);
2691        if (!blk) {
2692            goto err;
2693        }
2694        bs = blk_bs(blk);
2695
2696        bdrv_query_image_info(bs, &info, &err);
2697        if (err) {
2698            error_report_err(err);
2699            blk_unref(blk);
2700            goto err;
2701        }
2702
2703        elem = g_new0(ImageInfoList, 1);
2704        elem->value = info;
2705        *last = elem;
2706        last = &elem->next;
2707
2708        blk_unref(blk);
2709
2710        /* Clear parameters that only apply to the topmost image */
2711        filename = fmt = NULL;
2712        image_opts = false;
2713
2714        if (chain) {
2715            if (info->has_full_backing_filename) {
2716                filename = info->full_backing_filename;
2717            } else if (info->has_backing_filename) {
2718                error_report("Could not determine absolute backing filename,"
2719                             " but backing filename '%s' present",
2720                             info->backing_filename);
2721                goto err;
2722            }
2723            if (info->has_backing_filename_format) {
2724                fmt = info->backing_filename_format;
2725            }
2726        }
2727    }
2728    g_hash_table_destroy(filenames);
2729    return head;
2730
2731err:
2732    qapi_free_ImageInfoList(head);
2733    g_hash_table_destroy(filenames);
2734    return NULL;
2735}
2736
2737static int img_info(int argc, char **argv)
2738{
2739    int c;
2740    OutputFormat output_format = OFORMAT_HUMAN;
2741    bool chain = false;
2742    const char *filename, *fmt, *output;
2743    ImageInfoList *list;
2744    bool image_opts = false;
2745    bool force_share = false;
2746
2747    fmt = NULL;
2748    output = NULL;
2749    for(;;) {
2750        int option_index = 0;
2751        static const struct option long_options[] = {
2752            {"help", no_argument, 0, 'h'},
2753            {"format", required_argument, 0, 'f'},
2754            {"output", required_argument, 0, OPTION_OUTPUT},
2755            {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2756            {"object", required_argument, 0, OPTION_OBJECT},
2757            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2758            {"force-share", no_argument, 0, 'U'},
2759            {0, 0, 0, 0}
2760        };
2761        c = getopt_long(argc, argv, ":f:hU",
2762                        long_options, &option_index);
2763        if (c == -1) {
2764            break;
2765        }
2766        switch(c) {
2767        case ':':
2768            missing_argument(argv[optind - 1]);
2769            break;
2770        case '?':
2771            unrecognized_option(argv[optind - 1]);
2772            break;
2773        case 'h':
2774            help();
2775            break;
2776        case 'f':
2777            fmt = optarg;
2778            break;
2779        case 'U':
2780            force_share = true;
2781            break;
2782        case OPTION_OUTPUT:
2783            output = optarg;
2784            break;
2785        case OPTION_BACKING_CHAIN:
2786            chain = true;
2787            break;
2788        case OPTION_OBJECT: {
2789            QemuOpts *opts;
2790            opts = qemu_opts_parse_noisily(&qemu_object_opts,
2791                                           optarg, true);
2792            if (!opts) {
2793                return 1;
2794            }
2795        }   break;
2796        case OPTION_IMAGE_OPTS:
2797            image_opts = true;
2798            break;
2799        }
2800    }
2801    if (optind != argc - 1) {
2802        error_exit("Expecting one image file name");
2803    }
2804    filename = argv[optind++];
2805
2806    if (output && !strcmp(output, "json")) {
2807        output_format = OFORMAT_JSON;
2808    } else if (output && !strcmp(output, "human")) {
2809        output_format = OFORMAT_HUMAN;
2810    } else if (output) {
2811        error_report("--output must be used with human or json as argument.");
2812        return 1;
2813    }
2814
2815    if (qemu_opts_foreach(&qemu_object_opts,
2816                          user_creatable_add_opts_foreach,
2817                          qemu_img_object_print_help, &error_fatal)) {
2818        return 1;
2819    }
2820
2821    list = collect_image_info_list(image_opts, filename, fmt, chain,
2822                                   force_share);
2823    if (!list) {
2824        return 1;
2825    }
2826
2827    switch (output_format) {
2828    case OFORMAT_HUMAN:
2829        dump_human_image_info_list(list);
2830        break;
2831    case OFORMAT_JSON:
2832        if (chain) {
2833            dump_json_image_info_list(list);
2834        } else {
2835            dump_json_image_info(list->value);
2836        }
2837        break;
2838    }
2839
2840    qapi_free_ImageInfoList(list);
2841    return 0;
2842}
2843
2844static int dump_map_entry(OutputFormat output_format, MapEntry *e,
2845                          MapEntry *next)
2846{
2847    switch (output_format) {
2848    case OFORMAT_HUMAN:
2849        if (e->data && !e->has_offset) {
2850            error_report("File contains external, encrypted or compressed clusters.");
2851            return -1;
2852        }
2853        if (e->data && !e->zero) {
2854            printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2855                   e->start, e->length,
2856                   e->has_offset ? e->offset : 0,
2857                   e->has_filename ? e->filename : "");
2858        }
2859        /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2860         * Modify the flags here to allow more coalescing.
2861         */
2862        if (next && (!next->data || next->zero)) {
2863            next->data = false;
2864            next->zero = true;
2865        }
2866        break;
2867    case OFORMAT_JSON:
2868        printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2869               " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2870               (e->start == 0 ? "[" : ",\n"),
2871               e->start, e->length, e->depth,
2872               e->zero ? "true" : "false",
2873               e->data ? "true" : "false");
2874        if (e->has_offset) {
2875            printf(", \"offset\": %"PRId64"", e->offset);
2876        }
2877        putchar('}');
2878
2879        if (!next) {
2880            printf("]\n");
2881        }
2882        break;
2883    }
2884    return 0;
2885}
2886
2887static int get_block_status(BlockDriverState *bs, int64_t offset,
2888                            int64_t bytes, MapEntry *e)
2889{
2890    int ret;
2891    int depth;
2892    BlockDriverState *file;
2893    bool has_offset;
2894    int64_t map;
2895    char *filename = NULL;
2896
2897    /* As an optimization, we could cache the current range of unallocated
2898     * clusters in each file of the chain, and avoid querying the same
2899     * range repeatedly.
2900     */
2901
2902    depth = 0;
2903    for (;;) {
2904        ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
2905        if (ret < 0) {
2906            return ret;
2907        }
2908        assert(bytes);
2909        if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2910            break;
2911        }
2912        bs = backing_bs(bs);
2913        if (bs == NULL) {
2914            ret = 0;
2915            break;
2916        }
2917
2918        depth++;
2919    }
2920
2921    has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2922
2923    if (file && has_offset) {
2924        bdrv_refresh_filename(file);
2925        filename = file->filename;
2926    }
2927
2928    *e = (MapEntry) {
2929        .start = offset,
2930        .length = bytes,
2931        .data = !!(ret & BDRV_BLOCK_DATA),
2932        .zero = !!(ret & BDRV_BLOCK_ZERO),
2933        .offset = map,
2934        .has_offset = has_offset,
2935        .depth = depth,
2936        .has_filename = filename,
2937        .filename = filename,
2938    };
2939
2940    return 0;
2941}
2942
2943static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2944{
2945    if (curr->length == 0) {
2946        return false;
2947    }
2948    if (curr->zero != next->zero ||
2949        curr->data != next->data ||
2950        curr->depth != next->depth ||
2951        curr->has_filename != next->has_filename ||
2952        curr->has_offset != next->has_offset) {
2953        return false;
2954    }
2955    if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2956        return false;
2957    }
2958    if (curr->has_offset && curr->offset + curr->length != next->offset) {
2959        return false;
2960    }
2961    return true;
2962}
2963
2964static int img_map(int argc, char **argv)
2965{
2966    int c;
2967    OutputFormat output_format = OFORMAT_HUMAN;
2968    BlockBackend *blk;
2969    BlockDriverState *bs;
2970    const char *filename, *fmt, *output;
2971    int64_t length;
2972    MapEntry curr = { .length = 0 }, next;
2973    int ret = 0;
2974    bool image_opts = false;
2975    bool force_share = false;
2976
2977    fmt = NULL;
2978    output = NULL;
2979    for (;;) {
2980        int option_index = 0;
2981        static const struct option long_options[] = {
2982            {"help", no_argument, 0, 'h'},
2983            {"format", required_argument, 0, 'f'},
2984            {"output", required_argument, 0, OPTION_OUTPUT},
2985            {"object", required_argument, 0, OPTION_OBJECT},
2986            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2987            {"force-share", no_argument, 0, 'U'},
2988            {0, 0, 0, 0}
2989        };
2990        c = getopt_long(argc, argv, ":f:hU",
2991                        long_options, &option_index);
2992        if (c == -1) {
2993            break;
2994        }
2995        switch (c) {
2996        case ':':
2997            missing_argument(argv[optind - 1]);
2998            break;
2999        case '?':
3000            unrecognized_option(argv[optind - 1]);
3001            break;
3002        case 'h':
3003            help();
3004            break;
3005        case 'f':
3006            fmt = optarg;
3007            break;
3008        case 'U':
3009            force_share = true;
3010            break;
3011        case OPTION_OUTPUT:
3012            output = optarg;
3013            break;
3014        case OPTION_OBJECT: {
3015            QemuOpts *opts;
3016            opts = qemu_opts_parse_noisily(&qemu_object_opts,
3017                                           optarg, true);
3018            if (!opts) {
3019                return 1;
3020            }
3021        }   break;
3022        case OPTION_IMAGE_OPTS:
3023            image_opts = true;
3024            break;
3025        }
3026    }
3027    if (optind != argc - 1) {
3028        error_exit("Expecting one image file name");
3029    }
3030    filename = argv[optind];
3031
3032    if (output && !strcmp(output, "json")) {
3033        output_format = OFORMAT_JSON;
3034    } else if (output && !strcmp(output, "human")) {
3035        output_format = OFORMAT_HUMAN;
3036    } else if (output) {
3037        error_report("--output must be used with human or json as argument.");
3038        return 1;
3039    }
3040
3041    if (qemu_opts_foreach(&qemu_object_opts,
3042                          user_creatable_add_opts_foreach,
3043                          qemu_img_object_print_help, &error_fatal)) {
3044        return 1;
3045    }
3046
3047    blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3048    if (!blk) {
3049        return 1;
3050    }
3051    bs = blk_bs(blk);
3052
3053    if (output_format == OFORMAT_HUMAN) {
3054        printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3055    }
3056
3057    length = blk_getlength(blk);
3058    while (curr.start + curr.length < length) {
3059        int64_t offset = curr.start + curr.length;
3060        int64_t n;
3061
3062        /* Probe up to 1 GiB at a time.  */
3063        n = MIN(1 * GiB, length - offset);
3064        ret = get_block_status(bs, offset, n, &next);
3065
3066        if (ret < 0) {
3067            error_report("Could not read file metadata: %s", strerror(-ret));
3068            goto out;
3069        }
3070
3071        if (entry_mergeable(&curr, &next)) {
3072            curr.length += next.length;
3073            continue;
3074        }
3075
3076        if (curr.length > 0) {
3077            ret = dump_map_entry(output_format, &curr, &next);
3078            if (ret < 0) {
3079                goto out;
3080            }
3081        }
3082        curr = next;
3083    }
3084
3085    ret = dump_map_entry(output_format, &curr, NULL);
3086
3087out:
3088    blk_unref(blk);
3089    return ret < 0;
3090}
3091
3092#define SNAPSHOT_LIST   1
3093#define SNAPSHOT_CREATE 2
3094#define SNAPSHOT_APPLY  3
3095#define SNAPSHOT_DELETE 4
3096
3097static int img_snapshot(int argc, char **argv)
3098{
3099    BlockBackend *blk;
3100    BlockDriverState *bs;
3101    QEMUSnapshotInfo sn;
3102    char *filename, *snapshot_name = NULL;
3103    int c, ret = 0, bdrv_oflags;
3104    int action = 0;
3105    qemu_timeval tv;
3106    bool quiet = false;
3107    Error *err = NULL;
3108    bool image_opts = false;
3109    bool force_share = false;
3110
3111    bdrv_oflags = BDRV_O_RDWR;
3112    /* Parse commandline parameters */
3113    for(;;) {
3114        static const struct option long_options[] = {
3115            {"help", no_argument, 0, 'h'},
3116            {"object", required_argument, 0, OPTION_OBJECT},
3117            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3118            {"force-share", no_argument, 0, 'U'},
3119            {0, 0, 0, 0}
3120        };
3121        c = getopt_long(argc, argv, ":la:c:d:hqU",
3122                        long_options, NULL);
3123        if (c == -1) {
3124            break;
3125        }
3126        switch(c) {
3127        case ':':
3128            missing_argument(argv[optind - 1]);
3129            break;
3130        case '?':
3131            unrecognized_option(argv[optind - 1]);
3132            break;
3133        case 'h':
3134            help();
3135            return 0;
3136        case 'l':
3137            if (action) {
3138                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3139                return 0;
3140            }
3141            action = SNAPSHOT_LIST;
3142            bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3143            break;
3144        case 'a':
3145            if (action) {
3146                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3147                return 0;
3148            }
3149            action = SNAPSHOT_APPLY;
3150            snapshot_name = optarg;
3151            break;
3152        case 'c':
3153            if (action) {
3154                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3155                return 0;
3156            }
3157            action = SNAPSHOT_CREATE;
3158            snapshot_name = optarg;
3159            break;
3160        case 'd':
3161            if (action) {
3162                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3163                return 0;
3164            }
3165            action = SNAPSHOT_DELETE;
3166            snapshot_name = optarg;
3167            break;
3168        case 'q':
3169            quiet = true;
3170            break;
3171        case 'U':
3172            force_share = true;
3173            break;
3174        case OPTION_OBJECT: {
3175            QemuOpts *opts;
3176            opts = qemu_opts_parse_noisily(&qemu_object_opts,
3177                                           optarg, true);
3178            if (!opts) {
3179                return 1;
3180            }
3181        }   break;
3182        case OPTION_IMAGE_OPTS:
3183            image_opts = true;
3184            break;
3185        }
3186    }
3187
3188    if (optind != argc - 1) {
3189        error_exit("Expecting one image file name");
3190    }
3191    filename = argv[optind++];
3192
3193    if (qemu_opts_foreach(&qemu_object_opts,
3194                          user_creatable_add_opts_foreach,
3195                          qemu_img_object_print_help, &error_fatal)) {
3196        return 1;
3197    }
3198
3199    /* Open the image */
3200    blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3201                   force_share);
3202    if (!blk) {
3203        return 1;
3204    }
3205    bs = blk_bs(blk);
3206
3207    /* Perform the requested action */
3208    switch(action) {
3209    case SNAPSHOT_LIST:
3210        dump_snapshots(bs);
3211        break;
3212
3213    case SNAPSHOT_CREATE:
3214        memset(&sn, 0, sizeof(sn));
3215        pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3216
3217        qemu_gettimeofday(&tv);
3218        sn.date_sec = tv.tv_sec;
3219        sn.date_nsec = tv.tv_usec * 1000;
3220
3221        ret = bdrv_snapshot_create(bs, &sn);
3222        if (ret) {
3223            error_report("Could not create snapshot '%s': %d (%s)",
3224                snapshot_name, ret, strerror(-ret));
3225        }
3226        break;
3227
3228    case SNAPSHOT_APPLY:
3229        ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3230        if (ret) {
3231            error_reportf_err(err, "Could not apply snapshot '%s': ",
3232                              snapshot_name);
3233        }
3234        break;
3235
3236    case SNAPSHOT_DELETE:
3237        ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3238        if (ret < 0) {
3239            error_report("Could not delete snapshot '%s': snapshot not "
3240                         "found", snapshot_name);
3241            ret = 1;
3242        } else {
3243            ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3244            if (ret < 0) {
3245                error_reportf_err(err, "Could not delete snapshot '%s': ",
3246                                  snapshot_name);
3247                ret = 1;
3248            }
3249        }
3250        break;
3251    }
3252
3253    /* Cleanup */
3254    blk_unref(blk);
3255    if (ret) {
3256        return 1;
3257    }
3258    return 0;
3259}
3260
3261static int img_rebase(int argc, char **argv)
3262{
3263    BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3264    uint8_t *buf_old = NULL;
3265    uint8_t *buf_new = NULL;
3266    BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3267    char *filename;
3268    const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3269    int c, flags, src_flags, ret;
3270    bool writethrough, src_writethrough;
3271    int unsafe = 0;
3272    bool force_share = false;
3273    int progress = 0;
3274    bool quiet = false;
3275    Error *local_err = NULL;
3276    bool image_opts = false;
3277
3278    /* Parse commandline parameters */
3279    fmt = NULL;
3280    cache = BDRV_DEFAULT_CACHE;
3281    src_cache = BDRV_DEFAULT_CACHE;
3282    out_baseimg = NULL;
3283    out_basefmt = NULL;
3284    for(;;) {
3285        static const struct option long_options[] = {
3286            {"help", no_argument, 0, 'h'},
3287            {"object", required_argument, 0, OPTION_OBJECT},
3288            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3289            {"force-share", no_argument, 0, 'U'},
3290            {0, 0, 0, 0}
3291        };
3292        c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3293                        long_options, NULL);
3294        if (c == -1) {
3295            break;
3296        }
3297        switch(c) {
3298        case ':':
3299            missing_argument(argv[optind - 1]);
3300            break;
3301        case '?':
3302            unrecognized_option(argv[optind - 1]);
3303            break;
3304        case 'h':
3305            help();
3306            return 0;
3307        case 'f':
3308            fmt = optarg;
3309            break;
3310        case 'F':
3311            out_basefmt = optarg;
3312            break;
3313        case 'b':
3314            out_baseimg = optarg;
3315            break;
3316        case 'u':
3317            unsafe = 1;
3318            break;
3319        case 'p':
3320            progress = 1;
3321            break;
3322        case 't':
3323            cache = optarg;
3324            break;
3325        case 'T':
3326            src_cache = optarg;
3327            break;
3328        case 'q':
3329            quiet = true;
3330            break;
3331        case OPTION_OBJECT: {
3332            QemuOpts *opts;
3333            opts = qemu_opts_parse_noisily(&qemu_object_opts,
3334                                           optarg, true);
3335            if (!opts) {
3336                return 1;
3337            }
3338        }   break;
3339        case OPTION_IMAGE_OPTS:
3340            image_opts = true;
3341            break;
3342        case 'U':
3343            force_share = true;
3344            break;
3345        }
3346    }
3347
3348    if (quiet) {
3349        progress = 0;
3350    }
3351
3352    if (optind != argc - 1) {
3353        error_exit("Expecting one image file name");
3354    }
3355    if (!unsafe && !out_baseimg) {
3356        error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3357    }
3358    filename = argv[optind++];
3359
3360    if (qemu_opts_foreach(&qemu_object_opts,
3361                          user_creatable_add_opts_foreach,
3362                          qemu_img_object_print_help, &error_fatal)) {
3363        return 1;
3364    }
3365
3366    qemu_progress_init(progress, 2.0);
3367    qemu_progress_print(0, 100);
3368
3369    flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3370    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3371    if (ret < 0) {
3372        error_report("Invalid cache option: %s", cache);
3373        goto out;
3374    }
3375
3376    src_flags = 0;
3377    ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3378    if (ret < 0) {
3379        error_report("Invalid source cache option: %s", src_cache);
3380        goto out;
3381    }
3382
3383    /* The source files are opened read-only, don't care about WCE */
3384    assert((src_flags & BDRV_O_RDWR) == 0);
3385    (void) src_writethrough;
3386
3387    /*
3388     * Open the images.
3389     *
3390     * Ignore the old backing file for unsafe rebase in case we want to correct
3391     * the reference to a renamed or moved backing file.
3392     */
3393    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3394                   false);
3395    if (!blk) {
3396        ret = -1;
3397        goto out;
3398    }
3399    bs = blk_bs(blk);
3400
3401    if (out_basefmt != NULL) {
3402        if (bdrv_find_format(out_basefmt) == NULL) {
3403            error_report("Invalid format name: '%s'", out_basefmt);
3404            ret = -1;
3405            goto out;
3406        }
3407    }
3408
3409    /* For safe rebasing we need to compare old and new backing file */
3410    if (!unsafe) {
3411        QDict *options = NULL;
3412        BlockDriverState *base_bs = backing_bs(bs);
3413
3414        if (base_bs) {
3415            blk_old_backing = blk_new(qemu_get_aio_context(),
3416                                      BLK_PERM_CONSISTENT_READ,
3417                                      BLK_PERM_ALL);
3418            ret = blk_insert_bs(blk_old_backing, base_bs,
3419                                &local_err);
3420            if (ret < 0) {
3421                error_reportf_err(local_err,
3422                                  "Could not reuse old backing file '%s': ",
3423                                  base_bs->filename);
3424                goto out;
3425            }
3426        } else {
3427            blk_old_backing = NULL;
3428        }
3429
3430        if (out_baseimg[0]) {
3431            const char *overlay_filename;
3432            char *out_real_path;
3433
3434            options = qdict_new();
3435            if (out_basefmt) {
3436                qdict_put_str(options, "driver", out_basefmt);
3437            }
3438            if (force_share) {
3439                qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3440            }
3441
3442            bdrv_refresh_filename(bs);
3443            overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3444                                                     : bs->filename;
3445            out_real_path =
3446                bdrv_get_full_backing_filename_from_filename(overlay_filename,
3447                                                             out_baseimg,
3448                                                             &local_err);
3449            if (local_err) {
3450                qobject_unref(options);
3451                error_reportf_err(local_err,
3452                                  "Could not resolve backing filename: ");
3453                ret = -1;
3454                goto out;
3455            }
3456
3457            /*
3458             * Find out whether we rebase an image on top of a previous image
3459             * in its chain.
3460             */
3461            prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3462            if (prefix_chain_bs) {
3463                qobject_unref(options);
3464                g_free(out_real_path);
3465
3466                blk_new_backing = blk_new(qemu_get_aio_context(),
3467                                          BLK_PERM_CONSISTENT_READ,
3468                                          BLK_PERM_ALL);
3469                ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3470                                    &local_err);
3471                if (ret < 0) {
3472                    error_reportf_err(local_err,
3473                                      "Could not reuse backing file '%s': ",
3474                                      out_baseimg);
3475                    goto out;
3476                }
3477            } else {
3478                blk_new_backing = blk_new_open(out_real_path, NULL,
3479                                               options, src_flags, &local_err);
3480                g_free(out_real_path);
3481                if (!blk_new_backing) {
3482                    error_reportf_err(local_err,
3483                                      "Could not open new backing file '%s': ",
3484                                      out_baseimg);
3485                    ret = -1;
3486                    goto out;
3487                }
3488            }
3489        }
3490    }
3491
3492    /*
3493     * Check each unallocated cluster in the COW file. If it is unallocated,
3494     * accesses go to the backing file. We must therefore compare this cluster
3495     * in the old and new backing file, and if they differ we need to copy it
3496     * from the old backing file into the COW file.
3497     *
3498     * If qemu-img crashes during this step, no harm is done. The content of
3499     * the image is the same as the original one at any time.
3500     */
3501    if (!unsafe) {
3502        int64_t size;
3503        int64_t old_backing_size = 0;
3504        int64_t new_backing_size = 0;
3505        uint64_t offset;
3506        int64_t n;
3507        float local_progress = 0;
3508
3509        buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3510        buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3511
3512        size = blk_getlength(blk);
3513        if (size < 0) {
3514            error_report("Could not get size of '%s': %s",
3515                         filename, strerror(-size));
3516            ret = -1;
3517            goto out;
3518        }
3519        if (blk_old_backing) {
3520            old_backing_size = blk_getlength(blk_old_backing);
3521            if (old_backing_size < 0) {
3522                char backing_name[PATH_MAX];
3523
3524                bdrv_get_backing_filename(bs, backing_name,
3525                                          sizeof(backing_name));
3526                error_report("Could not get size of '%s': %s",
3527                             backing_name, strerror(-old_backing_size));
3528                ret = -1;
3529                goto out;
3530            }
3531        }
3532        if (blk_new_backing) {
3533            new_backing_size = blk_getlength(blk_new_backing);
3534            if (new_backing_size < 0) {
3535                error_report("Could not get size of '%s': %s",
3536                             out_baseimg, strerror(-new_backing_size));
3537                ret = -1;
3538                goto out;
3539            }
3540        }
3541
3542        if (size != 0) {
3543            local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3544        }
3545
3546        for (offset = 0; offset < size; offset += n) {
3547            bool buf_old_is_zero = false;
3548
3549            /* How many bytes can we handle with the next read? */
3550            n = MIN(IO_BUF_SIZE, size - offset);
3551
3552            /* If the cluster is allocated, we don't need to take action */
3553            ret = bdrv_is_allocated(bs, offset, n, &n);
3554            if (ret < 0) {
3555                error_report("error while reading image metadata: %s",
3556                             strerror(-ret));
3557                goto out;
3558            }
3559            if (ret) {
3560                continue;
3561            }
3562
3563            if (prefix_chain_bs) {
3564                /*
3565                 * If cluster wasn't changed since prefix_chain, we don't need
3566                 * to take action
3567                 */
3568                ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
3569                                              false, offset, n, &n);
3570                if (ret < 0) {
3571                    error_report("error while reading image metadata: %s",
3572                                 strerror(-ret));
3573                    goto out;
3574                }
3575                if (!ret) {
3576                    continue;
3577                }
3578            }
3579
3580            /*
3581             * Read old and new backing file and take into consideration that
3582             * backing files may be smaller than the COW image.
3583             */
3584            if (offset >= old_backing_size) {
3585                memset(buf_old, 0, n);
3586                buf_old_is_zero = true;
3587            } else {
3588                if (offset + n > old_backing_size) {
3589                    n = old_backing_size - offset;
3590                }
3591
3592                ret = blk_pread(blk_old_backing, offset, buf_old, n);
3593                if (ret < 0) {
3594                    error_report("error while reading from old backing file");
3595                    goto out;
3596                }
3597            }
3598
3599            if (offset >= new_backing_size || !blk_new_backing) {
3600                memset(buf_new, 0, n);
3601            } else {
3602                if (offset + n > new_backing_size) {
3603                    n = new_backing_size - offset;
3604                }
3605
3606                ret = blk_pread(blk_new_backing, offset, buf_new, n);
3607                if (ret < 0) {
3608                    error_report("error while reading from new backing file");
3609                    goto out;
3610                }
3611            }
3612
3613            /* If they differ, we need to write to the COW file */
3614            uint64_t written = 0;
3615
3616            while (written < n) {
3617                int64_t pnum;
3618
3619                if (compare_buffers(buf_old + written, buf_new + written,
3620                                    n - written, &pnum))
3621                {
3622                    if (buf_old_is_zero) {
3623                        ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3624                    } else {
3625                        ret = blk_pwrite(blk, offset + written,
3626                                         buf_old + written, pnum, 0);
3627                    }
3628                    if (ret < 0) {
3629                        error_report("Error while writing to COW image: %s",
3630                            strerror(-ret));
3631                        goto out;
3632                    }
3633                }
3634
3635                written += pnum;
3636            }
3637            qemu_progress_print(local_progress, 100);
3638        }
3639    }
3640
3641    /*
3642     * Change the backing file. All clusters that are different from the old
3643     * backing file are overwritten in the COW file now, so the visible content
3644     * doesn't change when we switch the backing file.
3645     */
3646    if (out_baseimg && *out_baseimg) {
3647        ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3648    } else {
3649        ret = bdrv_change_backing_file(bs, NULL, NULL);
3650    }
3651
3652    if (ret == -ENOSPC) {
3653        error_report("Could not change the backing file to '%s': No "
3654                     "space left in the file header", out_baseimg);
3655    } else if (ret < 0) {
3656        error_report("Could not change the backing file to '%s': %s",
3657            out_baseimg, strerror(-ret));
3658    }
3659
3660    qemu_progress_print(100, 0);
3661    /*
3662     * TODO At this point it is possible to check if any clusters that are
3663     * allocated in the COW file are the same in the backing file. If so, they
3664     * could be dropped from the COW file. Don't do this before switching the
3665     * backing file, in case of a crash this would lead to corruption.
3666     */
3667out:
3668    qemu_progress_end();
3669    /* Cleanup */
3670    if (!unsafe) {
3671        blk_unref(blk_old_backing);
3672        blk_unref(blk_new_backing);
3673    }
3674    qemu_vfree(buf_old);
3675    qemu_vfree(buf_new);
3676
3677    blk_unref(blk);
3678    if (ret) {
3679        return 1;
3680    }
3681    return 0;
3682}
3683
3684static int img_resize(int argc, char **argv)
3685{
3686    Error *err = NULL;
3687    int c, ret, relative;
3688    const char *filename, *fmt, *size;
3689    int64_t n, total_size, current_size;
3690    bool quiet = false;
3691    BlockBackend *blk = NULL;
3692    PreallocMode prealloc = PREALLOC_MODE_OFF;
3693    QemuOpts *param;
3694
3695    static QemuOptsList resize_options = {
3696        .name = "resize_options",
3697        .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3698        .desc = {
3699            {
3700                .name = BLOCK_OPT_SIZE,
3701                .type = QEMU_OPT_SIZE,
3702                .help = "Virtual disk size"
3703            }, {
3704                /* end of list */
3705            }
3706        },
3707    };
3708    bool image_opts = false;
3709    bool shrink = false;
3710
3711    /* Remove size from argv manually so that negative numbers are not treated
3712     * as options by getopt. */
3713    if (argc < 3) {
3714        error_exit("Not enough arguments");
3715        return 1;
3716    }
3717
3718    size = argv[--argc];
3719
3720    /* Parse getopt arguments */
3721    fmt = NULL;
3722    for(;;) {
3723        static const struct option long_options[] = {
3724            {"help", no_argument, 0, 'h'},
3725            {"object", required_argument, 0, OPTION_OBJECT},
3726            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3727            {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
3728            {"shrink", no_argument, 0, OPTION_SHRINK},
3729            {0, 0, 0, 0}
3730        };
3731        c = getopt_long(argc, argv, ":f:hq",
3732                        long_options, NULL);
3733        if (c == -1) {
3734            break;
3735        }
3736        switch(c) {
3737        case ':':
3738            missing_argument(argv[optind - 1]);
3739            break;
3740        case '?':
3741            unrecognized_option(argv[optind - 1]);
3742            break;
3743        case 'h':
3744            help();
3745            break;
3746        case 'f':
3747            fmt = optarg;
3748            break;
3749        case 'q':
3750            quiet = true;
3751            break;
3752        case OPTION_OBJECT: {
3753            QemuOpts *opts;
3754            opts = qemu_opts_parse_noisily(&qemu_object_opts,
3755                                           optarg, true);
3756            if (!opts) {
3757                return 1;
3758            }
3759        }   break;
3760        case OPTION_IMAGE_OPTS:
3761            image_opts = true;
3762            break;
3763        case OPTION_PREALLOCATION:
3764            prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
3765                                       PREALLOC_MODE__MAX, NULL);
3766            if (prealloc == PREALLOC_MODE__MAX) {
3767                error_report("Invalid preallocation mode '%s'", optarg);
3768                return 1;
3769            }
3770            break;
3771        case OPTION_SHRINK:
3772            shrink = true;
3773            break;
3774        }
3775    }
3776    if (optind != argc - 1) {
3777        error_exit("Expecting image file name and size");
3778    }
3779    filename = argv[optind++];
3780
3781    if (qemu_opts_foreach(&qemu_object_opts,
3782                          user_creatable_add_opts_foreach,
3783                          qemu_img_object_print_help, &error_fatal)) {
3784        return 1;
3785    }
3786
3787    /* Choose grow, shrink, or absolute resize mode */
3788    switch (size[0]) {
3789    case '+':
3790        relative = 1;
3791        size++;
3792        break;
3793    case '-':
3794        relative = -1;
3795        size++;
3796        break;
3797    default:
3798        relative = 0;
3799        break;
3800    }
3801
3802    /* Parse size */
3803    param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3804    qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3805    if (err) {
3806        error_report_err(err);
3807        ret = -1;
3808        qemu_opts_del(param);
3809        goto out;
3810    }
3811    n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3812    qemu_opts_del(param);
3813
3814    blk = img_open(image_opts, filename, fmt,
3815                   BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
3816                   false);
3817    if (!blk) {
3818        ret = -1;
3819        goto out;
3820    }
3821
3822    current_size = blk_getlength(blk);
3823    if (current_size < 0) {
3824        error_report("Failed to inquire current image length: %s",
3825                     strerror(-current_size));
3826        ret = -1;
3827        goto out;
3828    }
3829
3830    if (relative) {
3831        total_size = current_size + n * relative;
3832    } else {
3833        total_size = n;
3834    }
3835    if (total_size <= 0) {
3836        error_report("New image size must be positive");
3837        ret = -1;
3838        goto out;
3839    }
3840
3841    if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
3842        error_report("Preallocation can only be used for growing images");
3843        ret = -1;
3844        goto out;
3845    }
3846
3847    if (total_size < current_size && !shrink) {
3848        warn_report("Shrinking an image will delete all data beyond the "
3849                    "shrunken image's end. Before performing such an "
3850                    "operation, make sure there is no important data there.");
3851
3852        if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) {
3853            error_report(
3854              "Use the --shrink option to perform a shrink operation.");
3855            ret = -1;
3856            goto out;
3857        } else {
3858            warn_report("Using the --shrink option will suppress this message. "
3859                        "Note that future versions of qemu-img may refuse to "
3860                        "shrink images without this option.");
3861        }
3862    }
3863
3864    /*
3865     * The user expects the image to have the desired size after
3866     * resizing, so pass @exact=true.  It is of no use to report
3867     * success when the image has not actually been resized.
3868     */
3869    ret = blk_truncate(blk, total_size, true, prealloc, &err);
3870    if (!ret) {
3871        qprintf(quiet, "Image resized.\n");
3872    } else {
3873        error_report_err(err);
3874    }
3875out:
3876    blk_unref(blk);
3877    if (ret) {
3878        return 1;
3879    }
3880    return 0;
3881}
3882
3883static void amend_status_cb(BlockDriverState *bs,
3884                            int64_t offset, int64_t total_work_size,
3885                            void *opaque)
3886{
3887    qemu_progress_print(100.f * offset / total_work_size, 0);
3888}
3889
3890static int print_amend_option_help(const char *format)
3891{
3892    BlockDriver *drv;
3893
3894    /* Find driver and parse its options */
3895    drv = bdrv_find_format(format);
3896    if (!drv) {
3897        error_report("Unknown file format '%s'", format);
3898        return 1;
3899    }
3900
3901    if (!drv->bdrv_amend_options) {
3902        error_report("Format driver '%s' does not support option amendment",
3903                     format);
3904        return 1;
3905    }
3906
3907    /* Every driver supporting amendment must have create_opts */
3908    assert(drv->create_opts);
3909
3910    printf("Creation options for '%s':\n", format);
3911    qemu_opts_print_help(drv->create_opts, false);
3912    printf("\nNote that not all of these options may be amendable.\n");
3913    return 0;
3914}
3915
3916static int img_amend(int argc, char **argv)
3917{
3918    Error *err = NULL;
3919    int c, ret = 0;
3920    char *options = NULL;
3921    QemuOptsList *create_opts = NULL;
3922    QemuOpts *opts = NULL;
3923    const char *fmt = NULL, *filename, *cache;
3924    int flags;
3925    bool writethrough;
3926    bool quiet = false, progress = false;
3927    BlockBackend *blk = NULL;
3928    BlockDriverState *bs = NULL;
3929    bool image_opts = false;
3930
3931    cache = BDRV_DEFAULT_CACHE;
3932    for (;;) {
3933        static const struct option long_options[] = {
3934            {"help", no_argument, 0, 'h'},
3935            {"object", required_argument, 0, OPTION_OBJECT},
3936            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3937            {0, 0, 0, 0}
3938        };
3939        c = getopt_long(argc, argv, ":ho:f:t:pq",
3940                        long_options, NULL);
3941        if (c == -1) {
3942            break;
3943        }
3944
3945        switch (c) {
3946        case ':':
3947            missing_argument(argv[optind - 1]);
3948            break;
3949        case '?':
3950            unrecognized_option(argv[optind - 1]);
3951            break;
3952        case 'h':
3953            help();
3954            break;
3955        case 'o':
3956            if (!is_valid_option_list(optarg)) {
3957                error_report("Invalid option list: %s", optarg);
3958                ret = -1;
3959                goto out_no_progress;
3960            }
3961            if (!options) {
3962                options = g_strdup(optarg);
3963            } else {
3964                char *old_options = options;
3965                options = g_strdup_printf("%s,%s", options, optarg);
3966                g_free(old_options);
3967            }
3968            break;
3969        case 'f':
3970            fmt = optarg;
3971            break;
3972        case 't':
3973            cache = optarg;
3974            break;
3975        case 'p':
3976            progress = true;
3977            break;
3978        case 'q':
3979            quiet = true;
3980            break;
3981        case OPTION_OBJECT:
3982            opts = qemu_opts_parse_noisily(&qemu_object_opts,
3983                                           optarg, true);
3984            if (!opts) {
3985                ret = -1;
3986                goto out_no_progress;
3987            }
3988            break;
3989        case OPTION_IMAGE_OPTS:
3990            image_opts = true;
3991            break;
3992        }
3993    }
3994
3995    if (!options) {
3996        error_exit("Must specify options (-o)");
3997    }
3998
3999    if (qemu_opts_foreach(&qemu_object_opts,
4000                          user_creatable_add_opts_foreach,
4001                          qemu_img_object_print_help, &error_fatal)) {
4002        ret = -1;
4003        goto out_no_progress;
4004    }
4005
4006    if (quiet) {
4007        progress = false;
4008    }
4009    qemu_progress_init(progress, 1.0);
4010
4011    filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4012    if (fmt && has_help_option(options)) {
4013        /* If a format is explicitly specified (and possibly no filename is
4014         * given), print option help here */
4015        ret = print_amend_option_help(fmt);
4016        goto out;
4017    }
4018
4019    if (optind != argc - 1) {
4020        error_report("Expecting one image file name");
4021        ret = -1;
4022        goto out;
4023    }
4024
4025    flags = BDRV_O_RDWR;
4026    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4027    if (ret < 0) {
4028        error_report("Invalid cache option: %s", cache);
4029        goto out;
4030    }
4031
4032    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4033                   false);
4034    if (!blk) {
4035        ret = -1;
4036        goto out;
4037    }
4038    bs = blk_bs(blk);
4039
4040    fmt = bs->drv->format_name;
4041
4042    if (has_help_option(options)) {
4043        /* If the format was auto-detected, print option help here */
4044        ret = print_amend_option_help(fmt);
4045        goto out;
4046    }
4047
4048    if (!bs->drv->bdrv_amend_options) {
4049        error_report("Format driver '%s' does not support option amendment",
4050                     fmt);
4051        ret = -1;
4052        goto out;
4053    }
4054
4055    /* Every driver supporting amendment must have create_opts */
4056    assert(bs->drv->create_opts);
4057
4058    create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
4059    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4060    qemu_opts_do_parse(opts, options, NULL, &err);
4061    if (err) {
4062        error_report_err(err);
4063        ret = -1;
4064        goto out;
4065    }
4066
4067    /* In case the driver does not call amend_status_cb() */
4068    qemu_progress_print(0.f, 0);
4069    ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, &err);
4070    qemu_progress_print(100.f, 0);
4071    if (ret < 0) {
4072        error_report_err(err);
4073        goto out;
4074    }
4075
4076out:
4077    qemu_progress_end();
4078
4079out_no_progress:
4080    blk_unref(blk);
4081    qemu_opts_del(opts);
4082    qemu_opts_free(create_opts);
4083    g_free(options);
4084
4085    if (ret) {
4086        return 1;
4087    }
4088    return 0;
4089}
4090
4091typedef struct BenchData {
4092    BlockBackend *blk;
4093    uint64_t image_size;
4094    bool write;
4095    int bufsize;
4096    int step;
4097    int nrreq;
4098    int n;
4099    int flush_interval;
4100    bool drain_on_flush;
4101    uint8_t *buf;
4102    QEMUIOVector *qiov;
4103
4104    int in_flight;
4105    bool in_flush;
4106    uint64_t offset;
4107} BenchData;
4108
4109static void bench_undrained_flush_cb(void *opaque, int ret)
4110{
4111    if (ret < 0) {
4112        error_report("Failed flush request: %s", strerror(-ret));
4113        exit(EXIT_FAILURE);
4114    }
4115}
4116
4117static void bench_cb(void *opaque, int ret)
4118{
4119    BenchData *b = opaque;
4120    BlockAIOCB *acb;
4121
4122    if (ret < 0) {
4123        error_report("Failed request: %s", strerror(-ret));
4124        exit(EXIT_FAILURE);
4125    }
4126
4127    if (b->in_flush) {
4128        /* Just finished a flush with drained queue: Start next requests */
4129        assert(b->in_flight == 0);
4130        b->in_flush = false;
4131    } else if (b->in_flight > 0) {
4132        int remaining = b->n - b->in_flight;
4133
4134        b->n--;
4135        b->in_flight--;
4136
4137        /* Time for flush? Drain queue if requested, then flush */
4138        if (b->flush_interval && remaining % b->flush_interval == 0) {
4139            if (!b->in_flight || !b->drain_on_flush) {
4140                BlockCompletionFunc *cb;
4141
4142                if (b->drain_on_flush) {
4143                    b->in_flush = true;
4144                    cb = bench_cb;
4145                } else {
4146                    cb = bench_undrained_flush_cb;
4147                }
4148
4149                acb = blk_aio_flush(b->blk, cb, b);
4150                if (!acb) {
4151                    error_report("Failed to issue flush request");
4152                    exit(EXIT_FAILURE);
4153                }
4154            }
4155            if (b->drain_on_flush) {
4156                return;
4157            }
4158        }
4159    }
4160
4161    while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4162        int64_t offset = b->offset;
4163        /* blk_aio_* might look for completed I/Os and kick bench_cb
4164         * again, so make sure this operation is counted by in_flight
4165         * and b->offset is ready for the next submission.
4166         */
4167        b->in_flight++;
4168        b->offset += b->step;
4169        b->offset %= b->image_size;
4170        if (b->write) {
4171            acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4172        } else {
4173            acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4174        }
4175        if (!acb) {
4176            error_report("Failed to issue request");
4177            exit(EXIT_FAILURE);
4178        }
4179    }
4180}
4181
4182static int img_bench(int argc, char **argv)
4183{
4184    int c, ret = 0;
4185    const char *fmt = NULL, *filename;
4186    bool quiet = false;
4187    bool image_opts = false;
4188    bool is_write = false;
4189    int count = 75000;
4190    int depth = 64;
4191    int64_t offset = 0;
4192    size_t bufsize = 4096;
4193    int pattern = 0;
4194    size_t step = 0;
4195    int flush_interval = 0;
4196    bool drain_on_flush = true;
4197    int64_t image_size;
4198    BlockBackend *blk = NULL;
4199    BenchData data = {};
4200    int flags = 0;
4201    bool writethrough = false;
4202    struct timeval t1, t2;
4203    int i;
4204    bool force_share = false;
4205    size_t buf_size;
4206
4207    for (;;) {
4208        static const struct option long_options[] = {
4209            {"help", no_argument, 0, 'h'},
4210            {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4211            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4212            {"pattern", required_argument, 0, OPTION_PATTERN},
4213            {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4214            {"force-share", no_argument, 0, 'U'},
4215            {0, 0, 0, 0}
4216        };
4217        c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
4218                        NULL);
4219        if (c == -1) {
4220            break;
4221        }
4222
4223        switch (c) {
4224        case ':':
4225            missing_argument(argv[optind - 1]);
4226            break;
4227        case '?':
4228            unrecognized_option(argv[optind - 1]);
4229            break;
4230        case 'h':
4231            help();
4232            break;
4233        case 'c':
4234        {
4235            unsigned long res;
4236
4237            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4238                error_report("Invalid request count specified");
4239                return 1;
4240            }
4241            count = res;
4242            break;
4243        }
4244        case 'd':
4245        {
4246            unsigned long res;
4247
4248            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4249                error_report("Invalid queue depth specified");
4250                return 1;
4251            }
4252            depth = res;
4253            break;
4254        }
4255        case 'f':
4256            fmt = optarg;
4257            break;
4258        case 'n':
4259            flags |= BDRV_O_NATIVE_AIO;
4260            break;
4261        case 'i':
4262            ret = bdrv_parse_aio(optarg, &flags);
4263            if (ret < 0) {
4264                error_report("Invalid aio option: %s", optarg);
4265                ret = -1;
4266                goto out;
4267            }
4268            break;
4269        case 'o':
4270        {
4271            offset = cvtnum(optarg);
4272            if (offset < 0) {
4273                error_report("Invalid offset specified");
4274                return 1;
4275            }
4276            break;
4277        }
4278            break;
4279        case 'q':
4280            quiet = true;
4281            break;
4282        case 's':
4283        {
4284            int64_t sval;
4285
4286            sval = cvtnum(optarg);
4287            if (sval < 0 || sval > INT_MAX) {
4288                error_report("Invalid buffer size specified");
4289                return 1;
4290            }
4291
4292            bufsize = sval;
4293            break;
4294        }
4295        case 'S':
4296        {
4297            int64_t sval;
4298
4299            sval = cvtnum(optarg);
4300            if (sval < 0 || sval > INT_MAX) {
4301                error_report("Invalid step size specified");
4302                return 1;
4303            }
4304
4305            step = sval;
4306            break;
4307        }
4308        case 't':
4309            ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4310            if (ret < 0) {
4311                error_report("Invalid cache mode");
4312                ret = -1;
4313                goto out;
4314            }
4315            break;
4316        case 'w':
4317            flags |= BDRV_O_RDWR;
4318            is_write = true;
4319            break;
4320        case 'U':
4321            force_share = true;
4322            break;
4323        case OPTION_PATTERN:
4324        {
4325            unsigned long res;
4326
4327            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4328                error_report("Invalid pattern byte specified");
4329                return 1;
4330            }
4331            pattern = res;
4332            break;
4333        }
4334        case OPTION_FLUSH_INTERVAL:
4335        {
4336            unsigned long res;
4337
4338            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4339                error_report("Invalid flush interval specified");
4340                return 1;
4341            }
4342            flush_interval = res;
4343            break;
4344        }
4345        case OPTION_NO_DRAIN:
4346            drain_on_flush = false;
4347            break;
4348        case OPTION_IMAGE_OPTS:
4349            image_opts = true;
4350            break;
4351        }
4352    }
4353
4354    if (optind != argc - 1) {
4355        error_exit("Expecting one image file name");
4356    }
4357    filename = argv[argc - 1];
4358
4359    if (!is_write && flush_interval) {
4360        error_report("--flush-interval is only available in write tests");
4361        ret = -1;
4362        goto out;
4363    }
4364    if (flush_interval && flush_interval < depth) {
4365        error_report("Flush interval can't be smaller than depth");
4366        ret = -1;
4367        goto out;
4368    }
4369
4370    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4371                   force_share);
4372    if (!blk) {
4373        ret = -1;
4374        goto out;
4375    }
4376
4377    image_size = blk_getlength(blk);
4378    if (image_size < 0) {
4379        ret = image_size;
4380        goto out;
4381    }
4382
4383    data = (BenchData) {
4384        .blk            = blk,
4385        .image_size     = image_size,
4386        .bufsize        = bufsize,
4387        .step           = step ?: bufsize,
4388        .nrreq          = depth,
4389        .n              = count,
4390        .offset         = offset,
4391        .write          = is_write,
4392        .flush_interval = flush_interval,
4393        .drain_on_flush = drain_on_flush,
4394    };
4395    printf("Sending %d %s requests, %d bytes each, %d in parallel "
4396           "(starting at offset %" PRId64 ", step size %d)\n",
4397           data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4398           data.offset, data.step);
4399    if (flush_interval) {
4400        printf("Sending flush every %d requests\n", flush_interval);
4401    }
4402
4403    buf_size = data.nrreq * data.bufsize;
4404    data.buf = blk_blockalign(blk, buf_size);
4405    memset(data.buf, pattern, data.nrreq * data.bufsize);
4406
4407    blk_register_buf(blk, data.buf, buf_size);
4408
4409    data.qiov = g_new(QEMUIOVector, data.nrreq);
4410    for (i = 0; i < data.nrreq; i++) {
4411        qemu_iovec_init(&data.qiov[i], 1);
4412        qemu_iovec_add(&data.qiov[i],
4413                       data.buf + i * data.bufsize, data.bufsize);
4414    }
4415
4416    gettimeofday(&t1, NULL);
4417    bench_cb(&data, 0);
4418
4419    while (data.n > 0) {
4420        main_loop_wait(false);
4421    }
4422    gettimeofday(&t2, NULL);
4423
4424    printf("Run completed in %3.3f seconds.\n",
4425           (t2.tv_sec - t1.tv_sec)
4426           + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4427
4428out:
4429    if (data.buf) {
4430        blk_unregister_buf(blk, data.buf);
4431    }
4432    qemu_vfree(data.buf);
4433    blk_unref(blk);
4434
4435    if (ret) {
4436        return 1;
4437    }
4438    return 0;
4439}
4440
4441#define C_BS      01
4442#define C_COUNT   02
4443#define C_IF      04
4444#define C_OF      010
4445#define C_SKIP    020
4446
4447struct DdInfo {
4448    unsigned int flags;
4449    int64_t count;
4450};
4451
4452struct DdIo {
4453    int bsz;    /* Block size */
4454    char *filename;
4455    uint8_t *buf;
4456    int64_t offset;
4457};
4458
4459struct DdOpts {
4460    const char *name;
4461    int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4462    unsigned int flag;
4463};
4464
4465static int img_dd_bs(const char *arg,
4466                     struct DdIo *in, struct DdIo *out,
4467                     struct DdInfo *dd)
4468{
4469    int64_t res;
4470
4471    res = cvtnum(arg);
4472
4473    if (res <= 0 || res > INT_MAX) {
4474        error_report("invalid number: '%s'", arg);
4475        return 1;
4476    }
4477    in->bsz = out->bsz = res;
4478
4479    return 0;
4480}
4481
4482static int img_dd_count(const char *arg,
4483                        struct DdIo *in, struct DdIo *out,
4484                        struct DdInfo *dd)
4485{
4486    dd->count = cvtnum(arg);
4487
4488    if (dd->count < 0) {
4489        error_report("invalid number: '%s'", arg);
4490        return 1;
4491    }
4492
4493    return 0;
4494}
4495
4496static int img_dd_if(const char *arg,
4497                     struct DdIo *in, struct DdIo *out,
4498                     struct DdInfo *dd)
4499{
4500    in->filename = g_strdup(arg);
4501
4502    return 0;
4503}
4504
4505static int img_dd_of(const char *arg,
4506                     struct DdIo *in, struct DdIo *out,
4507                     struct DdInfo *dd)
4508{
4509    out->filename = g_strdup(arg);
4510
4511    return 0;
4512}
4513
4514static int img_dd_skip(const char *arg,
4515                       struct DdIo *in, struct DdIo *out,
4516                       struct DdInfo *dd)
4517{
4518    in->offset = cvtnum(arg);
4519
4520    if (in->offset < 0) {
4521        error_report("invalid number: '%s'", arg);
4522        return 1;
4523    }
4524
4525    return 0;
4526}
4527
4528static int img_dd(int argc, char **argv)
4529{
4530    int ret = 0;
4531    char *arg = NULL;
4532    char *tmp;
4533    BlockDriver *drv = NULL, *proto_drv = NULL;
4534    BlockBackend *blk1 = NULL, *blk2 = NULL;
4535    QemuOpts *opts = NULL;
4536    QemuOptsList *create_opts = NULL;
4537    Error *local_err = NULL;
4538    bool image_opts = false;
4539    int c, i;
4540    const char *out_fmt = "raw";
4541    const char *fmt = NULL;
4542    int64_t size = 0;
4543    int64_t block_count = 0, out_pos, in_pos;
4544    bool force_share = false;
4545    struct DdInfo dd = {
4546        .flags = 0,
4547        .count = 0,
4548    };
4549    struct DdIo in = {
4550        .bsz = 512, /* Block size is by default 512 bytes */
4551        .filename = NULL,
4552        .buf = NULL,
4553        .offset = 0
4554    };
4555    struct DdIo out = {
4556        .bsz = 512,
4557        .filename = NULL,
4558        .buf = NULL,
4559        .offset = 0
4560    };
4561
4562    const struct DdOpts options[] = {
4563        { "bs", img_dd_bs, C_BS },
4564        { "count", img_dd_count, C_COUNT },
4565        { "if", img_dd_if, C_IF },
4566        { "of", img_dd_of, C_OF },
4567        { "skip", img_dd_skip, C_SKIP },
4568        { NULL, NULL, 0 }
4569    };
4570    const struct option long_options[] = {
4571        { "help", no_argument, 0, 'h'},
4572        { "object", required_argument, 0, OPTION_OBJECT},
4573        { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4574        { "force-share", no_argument, 0, 'U'},
4575        { 0, 0, 0, 0 }
4576    };
4577
4578    while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
4579        if (c == EOF) {
4580            break;
4581        }
4582        switch (c) {
4583        case 'O':
4584            out_fmt = optarg;
4585            break;
4586        case 'f':
4587            fmt = optarg;
4588            break;
4589        case ':':
4590            missing_argument(argv[optind - 1]);
4591            break;
4592        case '?':
4593            unrecognized_option(argv[optind - 1]);
4594            break;
4595        case 'h':
4596            help();
4597            break;
4598        case 'U':
4599            force_share = true;
4600            break;
4601        case OPTION_OBJECT:
4602            if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) {
4603                ret = -1;
4604                goto out;
4605            }
4606            break;
4607        case OPTION_IMAGE_OPTS:
4608            image_opts = true;
4609            break;
4610        }
4611    }
4612
4613    for (i = optind; i < argc; i++) {
4614        int j;
4615        arg = g_strdup(argv[i]);
4616
4617        tmp = strchr(arg, '=');
4618        if (tmp == NULL) {
4619            error_report("unrecognized operand %s", arg);
4620            ret = -1;
4621            goto out;
4622        }
4623
4624        *tmp++ = '\0';
4625
4626        for (j = 0; options[j].name != NULL; j++) {
4627            if (!strcmp(arg, options[j].name)) {
4628                break;
4629            }
4630        }
4631        if (options[j].name == NULL) {
4632            error_report("unrecognized operand %s", arg);
4633            ret = -1;
4634            goto out;
4635        }
4636
4637        if (options[j].f(tmp, &in, &out, &dd) != 0) {
4638            ret = -1;
4639            goto out;
4640        }
4641        dd.flags |= options[j].flag;
4642        g_free(arg);
4643        arg = NULL;
4644    }
4645
4646    if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4647        error_report("Must specify both input and output files");
4648        ret = -1;
4649        goto out;
4650    }
4651
4652    if (qemu_opts_foreach(&qemu_object_opts,
4653                          user_creatable_add_opts_foreach,
4654                          qemu_img_object_print_help, &error_fatal)) {
4655        ret = -1;
4656        goto out;
4657    }
4658
4659    blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
4660                    force_share);
4661
4662    if (!blk1) {
4663        ret = -1;
4664        goto out;
4665    }
4666
4667    drv = bdrv_find_format(out_fmt);
4668    if (!drv) {
4669        error_report("Unknown file format");
4670        ret = -1;
4671        goto out;
4672    }
4673    proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4674
4675    if (!proto_drv) {
4676        error_report_err(local_err);
4677        ret = -1;
4678        goto out;
4679    }
4680    if (!drv->create_opts) {
4681        error_report("Format driver '%s' does not support image creation",
4682                     drv->format_name);
4683        ret = -1;
4684        goto out;
4685    }
4686    if (!proto_drv->create_opts) {
4687        error_report("Protocol driver '%s' does not support image creation",
4688                     proto_drv->format_name);
4689        ret = -1;
4690        goto out;
4691    }
4692    create_opts = qemu_opts_append(create_opts, drv->create_opts);
4693    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4694
4695    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4696
4697    size = blk_getlength(blk1);
4698    if (size < 0) {
4699        error_report("Failed to get size for '%s'", in.filename);
4700        ret = -1;
4701        goto out;
4702    }
4703
4704    if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4705        dd.count * in.bsz < size) {
4706        size = dd.count * in.bsz;
4707    }
4708
4709    /* Overflow means the specified offset is beyond input image's size */
4710    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4711                              size < in.bsz * in.offset)) {
4712        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4713    } else {
4714        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4715                            size - in.bsz * in.offset, &error_abort);
4716    }
4717
4718    ret = bdrv_create(drv, out.filename, opts, &local_err);
4719    if (ret < 0) {
4720        error_reportf_err(local_err,
4721                          "%s: error while creating output image: ",
4722                          out.filename);
4723        ret = -1;
4724        goto out;
4725    }
4726
4727    /* TODO, we can't honour --image-opts for the target,
4728     * since it needs to be given in a format compatible
4729     * with the bdrv_create() call above which does not
4730     * support image-opts style.
4731     */
4732    blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
4733                         false, false, false);
4734
4735    if (!blk2) {
4736        ret = -1;
4737        goto out;
4738    }
4739
4740    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4741                              size < in.offset * in.bsz)) {
4742        /* We give a warning if the skip option is bigger than the input
4743         * size and create an empty output disk image (i.e. like dd(1)).
4744         */
4745        error_report("%s: cannot skip to specified offset", in.filename);
4746        in_pos = size;
4747    } else {
4748        in_pos = in.offset * in.bsz;
4749    }
4750
4751    in.buf = g_new(uint8_t, in.bsz);
4752
4753    for (out_pos = 0; in_pos < size; block_count++) {
4754        int in_ret, out_ret;
4755
4756        if (in_pos + in.bsz > size) {
4757            in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4758        } else {
4759            in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4760        }
4761        if (in_ret < 0) {
4762            error_report("error while reading from input image file: %s",
4763                         strerror(-in_ret));
4764            ret = -1;
4765            goto out;
4766        }
4767        in_pos += in_ret;
4768
4769        out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4770
4771        if (out_ret < 0) {
4772            error_report("error while writing to output image file: %s",
4773                         strerror(-out_ret));
4774            ret = -1;
4775            goto out;
4776        }
4777        out_pos += out_ret;
4778    }
4779
4780out:
4781    g_free(arg);
4782    qemu_opts_del(opts);
4783    qemu_opts_free(create_opts);
4784    blk_unref(blk1);
4785    blk_unref(blk2);
4786    g_free(in.filename);
4787    g_free(out.filename);
4788    g_free(in.buf);
4789    g_free(out.buf);
4790
4791    if (ret) {
4792        return 1;
4793    }
4794    return 0;
4795}
4796
4797static void dump_json_block_measure_info(BlockMeasureInfo *info)
4798{
4799    QString *str;
4800    QObject *obj;
4801    Visitor *v = qobject_output_visitor_new(&obj);
4802
4803    visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
4804    visit_complete(v, &obj);
4805    str = qobject_to_json_pretty(obj);
4806    assert(str != NULL);
4807    printf("%s\n", qstring_get_str(str));
4808    qobject_unref(obj);
4809    visit_free(v);
4810    qobject_unref(str);
4811}
4812
4813static int img_measure(int argc, char **argv)
4814{
4815    static const struct option long_options[] = {
4816        {"help", no_argument, 0, 'h'},
4817        {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4818        {"object", required_argument, 0, OPTION_OBJECT},
4819        {"output", required_argument, 0, OPTION_OUTPUT},
4820        {"size", required_argument, 0, OPTION_SIZE},
4821        {"force-share", no_argument, 0, 'U'},
4822        {0, 0, 0, 0}
4823    };
4824    OutputFormat output_format = OFORMAT_HUMAN;
4825    BlockBackend *in_blk = NULL;
4826    BlockDriver *drv;
4827    const char *filename = NULL;
4828    const char *fmt = NULL;
4829    const char *out_fmt = "raw";
4830    char *options = NULL;
4831    char *snapshot_name = NULL;
4832    bool force_share = false;
4833    QemuOpts *opts = NULL;
4834    QemuOpts *object_opts = NULL;
4835    QemuOpts *sn_opts = NULL;
4836    QemuOptsList *create_opts = NULL;
4837    bool image_opts = false;
4838    uint64_t img_size = UINT64_MAX;
4839    BlockMeasureInfo *info = NULL;
4840    Error *local_err = NULL;
4841    int ret = 1;
4842    int c;
4843
4844    while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
4845                            long_options, NULL)) != -1) {
4846        switch (c) {
4847        case '?':
4848        case 'h':
4849            help();
4850            break;
4851        case 'f':
4852            fmt = optarg;
4853            break;
4854        case 'O':
4855            out_fmt = optarg;
4856            break;
4857        case 'o':
4858            if (!is_valid_option_list(optarg)) {
4859                error_report("Invalid option list: %s", optarg);
4860                goto out;
4861            }
4862            if (!options) {
4863                options = g_strdup(optarg);
4864            } else {
4865                char *old_options = options;
4866                options = g_strdup_printf("%s,%s", options, optarg);
4867                g_free(old_options);
4868            }
4869            break;
4870        case 'l':
4871            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
4872                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
4873                                                  optarg, false);
4874                if (!sn_opts) {
4875                    error_report("Failed in parsing snapshot param '%s'",
4876                                 optarg);
4877                    goto out;
4878                }
4879            } else {
4880                snapshot_name = optarg;
4881            }
4882            break;
4883        case 'U':
4884            force_share = true;
4885            break;
4886        case OPTION_OBJECT:
4887            object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
4888                                                  optarg, true);
4889            if (!object_opts) {
4890                goto out;
4891            }
4892            break;
4893        case OPTION_IMAGE_OPTS:
4894            image_opts = true;
4895            break;
4896        case OPTION_OUTPUT:
4897            if (!strcmp(optarg, "json")) {
4898                output_format = OFORMAT_JSON;
4899            } else if (!strcmp(optarg, "human")) {
4900                output_format = OFORMAT_HUMAN;
4901            } else {
4902                error_report("--output must be used with human or json "
4903                             "as argument.");
4904                goto out;
4905            }
4906            break;
4907        case OPTION_SIZE:
4908        {
4909            int64_t sval;
4910
4911            sval = cvtnum(optarg);
4912            if (sval < 0) {
4913                if (sval == -ERANGE) {
4914                    error_report("Image size must be less than 8 EiB!");
4915                } else {
4916                    error_report("Invalid image size specified! You may use "
4917                                 "k, M, G, T, P or E suffixes for ");
4918                    error_report("kilobytes, megabytes, gigabytes, terabytes, "
4919                                 "petabytes and exabytes.");
4920                }
4921                goto out;
4922            }
4923            img_size = (uint64_t)sval;
4924        }
4925        break;
4926        }
4927    }
4928
4929    if (qemu_opts_foreach(&qemu_object_opts,
4930                          user_creatable_add_opts_foreach,
4931                          qemu_img_object_print_help, &error_fatal)) {
4932        goto out;
4933    }
4934
4935    if (argc - optind > 1) {
4936        error_report("At most one filename argument is allowed.");
4937        goto out;
4938    } else if (argc - optind == 1) {
4939        filename = argv[optind];
4940    }
4941
4942    if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
4943        error_report("--image-opts, -f, and -l require a filename argument.");
4944        goto out;
4945    }
4946    if (filename && img_size != UINT64_MAX) {
4947        error_report("--size N cannot be used together with a filename.");
4948        goto out;
4949    }
4950    if (!filename && img_size == UINT64_MAX) {
4951        error_report("Either --size N or one filename must be specified.");
4952        goto out;
4953    }
4954
4955    if (filename) {
4956        in_blk = img_open(image_opts, filename, fmt, 0,
4957                          false, false, force_share);
4958        if (!in_blk) {
4959            goto out;
4960        }
4961
4962        if (sn_opts) {
4963            bdrv_snapshot_load_tmp(blk_bs(in_blk),
4964                    qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
4965                    qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
4966                    &local_err);
4967        } else if (snapshot_name != NULL) {
4968            bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
4969                    snapshot_name, &local_err);
4970        }
4971        if (local_err) {
4972            error_reportf_err(local_err, "Failed to load snapshot: ");
4973            goto out;
4974        }
4975    }
4976
4977    drv = bdrv_find_format(out_fmt);
4978    if (!drv) {
4979        error_report("Unknown file format '%s'", out_fmt);
4980        goto out;
4981    }
4982    if (!drv->create_opts) {
4983        error_report("Format driver '%s' does not support image creation",
4984                     drv->format_name);
4985        goto out;
4986    }
4987
4988    create_opts = qemu_opts_append(create_opts, drv->create_opts);
4989    create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
4990    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4991    if (options) {
4992        qemu_opts_do_parse(opts, options, NULL, &local_err);
4993        if (local_err) {
4994            error_report_err(local_err);
4995            error_report("Invalid options for file format '%s'", out_fmt);
4996            goto out;
4997        }
4998    }
4999    if (img_size != UINT64_MAX) {
5000        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5001    }
5002
5003    info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5004    if (local_err) {
5005        error_report_err(local_err);
5006        goto out;
5007    }
5008
5009    if (output_format == OFORMAT_HUMAN) {
5010        printf("required size: %" PRIu64 "\n", info->required);
5011        printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5012    } else {
5013        dump_json_block_measure_info(info);
5014    }
5015
5016    ret = 0;
5017
5018out:
5019    qapi_free_BlockMeasureInfo(info);
5020    qemu_opts_del(object_opts);
5021    qemu_opts_del(opts);
5022    qemu_opts_del(sn_opts);
5023    qemu_opts_free(create_opts);
5024    g_free(options);
5025    blk_unref(in_blk);
5026    return ret;
5027}
5028
5029static const img_cmd_t img_cmds[] = {
5030#define DEF(option, callback, arg_string)        \
5031    { option, callback },
5032#include "qemu-img-cmds.h"
5033#undef DEF
5034    { NULL, NULL, },
5035};
5036
5037int main(int argc, char **argv)
5038{
5039    const img_cmd_t *cmd;
5040    const char *cmdname;
5041    Error *local_error = NULL;
5042    char *trace_file = NULL;
5043    int c;
5044    static const struct option long_options[] = {
5045        {"help", no_argument, 0, 'h'},
5046        {"version", no_argument, 0, 'V'},
5047        {"trace", required_argument, NULL, 'T'},
5048        {0, 0, 0, 0}
5049    };
5050
5051#ifdef CONFIG_POSIX
5052    signal(SIGPIPE, SIG_IGN);
5053#endif
5054
5055    error_init(argv[0]);
5056    module_call_init(MODULE_INIT_TRACE);
5057    qemu_init_exec_dir(argv[0]);
5058
5059    if (qemu_init_main_loop(&local_error)) {
5060        error_report_err(local_error);
5061        exit(EXIT_FAILURE);
5062    }
5063
5064    qcrypto_init(&error_fatal);
5065
5066    module_call_init(MODULE_INIT_QOM);
5067    bdrv_init();
5068    if (argc < 2) {
5069        error_exit("Not enough arguments");
5070    }
5071
5072    qemu_add_opts(&qemu_object_opts);
5073    qemu_add_opts(&qemu_source_opts);
5074    qemu_add_opts(&qemu_trace_opts);
5075
5076    while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5077        switch (c) {
5078        case ':':
5079            missing_argument(argv[optind - 1]);
5080            return 0;
5081        case '?':
5082            unrecognized_option(argv[optind - 1]);
5083            return 0;
5084        case 'h':
5085            help();
5086            return 0;
5087        case 'V':
5088            printf(QEMU_IMG_VERSION);
5089            return 0;
5090        case 'T':
5091            g_free(trace_file);
5092            trace_file = trace_opt_parse(optarg);
5093            break;
5094        }
5095    }
5096
5097    cmdname = argv[optind];
5098
5099    /* reset getopt_long scanning */
5100    argc -= optind;
5101    if (argc < 1) {
5102        return 0;
5103    }
5104    argv += optind;
5105    qemu_reset_optind();
5106
5107    if (!trace_init_backends()) {
5108        exit(1);
5109    }
5110    trace_init_file(trace_file);
5111    qemu_set_log(LOG_TRACE);
5112
5113    /* find the command */
5114    for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5115        if (!strcmp(cmdname, cmd->name)) {
5116            return cmd->handler(argc, argv);
5117        }
5118    }
5119
5120    /* not found */
5121    error_exit("Command not found: %s", cmdname);
5122}
5123