qemu/qemu-img.c
<<
>>
Prefs
   1/*
   2 * QEMU disk image utility
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include <getopt.h>
  27
  28#include "qemu/help-texts.h"
  29#include "qemu/qemu-progress.h"
  30#include "qemu-version.h"
  31#include "qapi/error.h"
  32#include "qapi/qapi-commands-block-core.h"
  33#include "qapi/qapi-visit-block-core.h"
  34#include "qapi/qobject-output-visitor.h"
  35#include "qapi/qmp/qjson.h"
  36#include "qapi/qmp/qdict.h"
  37#include "qemu/cutils.h"
  38#include "qemu/config-file.h"
  39#include "qemu/option.h"
  40#include "qemu/error-report.h"
  41#include "qemu/log.h"
  42#include "qemu/main-loop.h"
  43#include "qemu/module.h"
  44#include "qemu/sockets.h"
  45#include "qemu/units.h"
  46#include "qemu/memalign.h"
  47#include "qom/object_interfaces.h"
  48#include "sysemu/block-backend.h"
  49#include "block/block_int.h"
  50#include "block/blockjob.h"
  51#include "block/dirty-bitmap.h"
  52#include "block/qapi.h"
  53#include "crypto/init.h"
  54#include "trace/control.h"
  55#include "qemu/throttle.h"
  56#include "block/throttle-groups.h"
  57
  58#define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
  59                          "\n" QEMU_COPYRIGHT "\n"
  60
  61typedef struct img_cmd_t {
  62    const char *name;
  63    int (*handler)(int argc, char **argv);
  64} img_cmd_t;
  65
  66enum {
  67    OPTION_OUTPUT = 256,
  68    OPTION_BACKING_CHAIN = 257,
  69    OPTION_OBJECT = 258,
  70    OPTION_IMAGE_OPTS = 259,
  71    OPTION_PATTERN = 260,
  72    OPTION_FLUSH_INTERVAL = 261,
  73    OPTION_NO_DRAIN = 262,
  74    OPTION_TARGET_IMAGE_OPTS = 263,
  75    OPTION_SIZE = 264,
  76    OPTION_PREALLOCATION = 265,
  77    OPTION_SHRINK = 266,
  78    OPTION_SALVAGE = 267,
  79    OPTION_TARGET_IS_ZERO = 268,
  80    OPTION_ADD = 269,
  81    OPTION_REMOVE = 270,
  82    OPTION_CLEAR = 271,
  83    OPTION_ENABLE = 272,
  84    OPTION_DISABLE = 273,
  85    OPTION_MERGE = 274,
  86    OPTION_BITMAPS = 275,
  87    OPTION_FORCE = 276,
  88    OPTION_SKIP_BROKEN = 277,
  89};
  90
  91typedef enum OutputFormat {
  92    OFORMAT_JSON,
  93    OFORMAT_HUMAN,
  94} OutputFormat;
  95
  96/* Default to cache=writeback as data integrity is not important for qemu-img */
  97#define BDRV_DEFAULT_CACHE "writeback"
  98
  99static void format_print(void *opaque, const char *name)
 100{
 101    printf(" %s", name);
 102}
 103
 104static G_NORETURN G_GNUC_PRINTF(1, 2)
 105void error_exit(const char *fmt, ...)
 106{
 107    va_list ap;
 108
 109    va_start(ap, fmt);
 110    error_vreport(fmt, ap);
 111    va_end(ap);
 112
 113    error_printf("Try 'qemu-img --help' for more information\n");
 114    exit(EXIT_FAILURE);
 115}
 116
 117static G_NORETURN
 118void missing_argument(const char *option)
 119{
 120    error_exit("missing argument for option '%s'", option);
 121}
 122
 123static G_NORETURN
 124void unrecognized_option(const char *option)
 125{
 126    error_exit("unrecognized option '%s'", option);
 127}
 128
 129/* Please keep in synch with docs/tools/qemu-img.rst */
 130static G_NORETURN
 131void help(void)
 132{
 133    const char *help_msg =
 134           QEMU_IMG_VERSION
 135           "usage: qemu-img [standard options] command [command options]\n"
 136           "QEMU disk image utility\n"
 137           "\n"
 138           "    '-h', '--help'       display this help and exit\n"
 139           "    '-V', '--version'    output version information and exit\n"
 140           "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
 141           "                         specify tracing options\n"
 142           "\n"
 143           "Command syntax:\n"
 144#define DEF(option, callback, arg_string)        \
 145           "  " arg_string "\n"
 146#include "qemu-img-cmds.h"
 147#undef DEF
 148           "\n"
 149           "Command parameters:\n"
 150           "  'filename' is a disk image filename\n"
 151           "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
 152           "    manual page for a description of the object properties. The most common\n"
 153           "    object type is a 'secret', which is used to supply passwords and/or\n"
 154           "    encryption keys.\n"
 155           "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
 156           "  'cache' is the cache mode used to write the output disk image, the valid\n"
 157           "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
 158           "    'directsync' and 'unsafe' (default for convert)\n"
 159           "  'src_cache' is the cache mode used to read input disk images, the valid\n"
 160           "    options are the same as for the 'cache' option\n"
 161           "  'size' is the disk image size in bytes. Optional suffixes\n"
 162           "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
 163           "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
 164           "    supported. 'b' is ignored.\n"
 165           "  'output_filename' is the destination disk image filename\n"
 166           "  'output_fmt' is the destination format\n"
 167           "  'options' is a comma separated list of format specific options in a\n"
 168           "    name=value format. Use -o help for an overview of the options supported by\n"
 169           "    the used format\n"
 170           "  'snapshot_param' is param used for internal snapshot, format\n"
 171           "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
 172           "    '[ID_OR_NAME]'\n"
 173           "  '-c' indicates that target image must be compressed (qcow format only)\n"
 174           "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
 175           "       new backing file match exactly. The image doesn't need a working\n"
 176           "       backing file before rebasing in this case (useful for renaming the\n"
 177           "       backing file). For image creation, allow creating without attempting\n"
 178           "       to open the backing file.\n"
 179           "  '-h' with or without a command shows this help and lists the supported formats\n"
 180           "  '-p' show progress of command (only certain commands)\n"
 181           "  '-q' use Quiet mode - do not print any output (except errors)\n"
 182           "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
 183           "       contain only zeros for qemu-img to create a sparse image during\n"
 184           "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
 185           "       unallocated or zero sectors, and the destination image will always be\n"
 186           "       fully allocated\n"
 187           "  '--output' takes the format in which the output must be done (human or json)\n"
 188           "  '-n' skips the target volume creation (useful if the volume is created\n"
 189           "       prior to running qemu-img)\n"
 190           "\n"
 191           "Parameters to bitmap subcommand:\n"
 192           "  'bitmap' is the name of the bitmap to manipulate, through one or more\n"
 193           "       actions from '--add', '--remove', '--clear', '--enable', '--disable',\n"
 194           "       or '--merge source'\n"
 195           "  '-g granularity' sets the granularity for '--add' actions\n"
 196           "  '-b source' and '-F src_fmt' tell '--merge' actions to find the source\n"
 197           "       bitmaps from an alternative file\n"
 198           "\n"
 199           "Parameters to check subcommand:\n"
 200           "  '-r' tries to repair any inconsistencies that are found during the check.\n"
 201           "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
 202           "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
 203           "       hiding corruption that has already occurred.\n"
 204           "\n"
 205           "Parameters to convert subcommand:\n"
 206           "  '--bitmaps' copies all top-level persistent bitmaps to destination\n"
 207           "  '-m' specifies how many coroutines work in parallel during the convert\n"
 208           "       process (defaults to 8)\n"
 209           "  '-W' allow to write to the target out of order rather than sequential\n"
 210           "\n"
 211           "Parameters to snapshot subcommand:\n"
 212           "  'snapshot' is the name of the snapshot to create, apply or delete\n"
 213           "  '-a' applies a snapshot (revert disk to saved state)\n"
 214           "  '-c' creates a snapshot\n"
 215           "  '-d' deletes a snapshot\n"
 216           "  '-l' lists all snapshots in the given image\n"
 217           "\n"
 218           "Parameters to compare subcommand:\n"
 219           "  '-f' first image format\n"
 220           "  '-F' second image format\n"
 221           "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
 222           "\n"
 223           "Parameters to dd subcommand:\n"
 224           "  'bs=BYTES' read and write up to BYTES bytes at a time "
 225           "(default: 512)\n"
 226           "  'count=N' copy only N input blocks\n"
 227           "  'if=FILE' read from FILE\n"
 228           "  'of=FILE' write to FILE\n"
 229           "  'skip=N' skip N bs-sized blocks at the start of input\n";
 230
 231    printf("%s\nSupported formats:", help_msg);
 232    bdrv_iterate_format(format_print, NULL, false);
 233    printf("\n\n" QEMU_HELP_BOTTOM "\n");
 234    exit(EXIT_SUCCESS);
 235}
 236
 237/*
 238 * Is @optarg safe for accumulate_options()?
 239 * It is when multiple of them can be joined together separated by ','.
 240 * To make that work, @optarg must not start with ',' (or else a
 241 * separating ',' preceding it gets escaped), and it must not end with
 242 * an odd number of ',' (or else a separating ',' following it gets
 243 * escaped), or be empty (or else a separating ',' preceding it can
 244 * escape a separating ',' following it).
 245 * 
 246 */
 247static bool is_valid_option_list(const char *optarg)
 248{
 249    size_t len = strlen(optarg);
 250    size_t i;
 251
 252    if (!optarg[0] || optarg[0] == ',') {
 253        return false;
 254    }
 255
 256    for (i = len; i > 0 && optarg[i - 1] == ','; i--) {
 257    }
 258    if ((len - i) % 2) {
 259        return false;
 260    }
 261
 262    return true;
 263}
 264
 265static int accumulate_options(char **options, char *optarg)
 266{
 267    char *new_options;
 268
 269    if (!is_valid_option_list(optarg)) {
 270        error_report("Invalid option list: %s", optarg);
 271        return -1;
 272    }
 273
 274    if (!*options) {
 275        *options = g_strdup(optarg);
 276    } else {
 277        new_options = g_strdup_printf("%s,%s", *options, optarg);
 278        g_free(*options);
 279        *options = new_options;
 280    }
 281    return 0;
 282}
 283
 284static QemuOptsList qemu_source_opts = {
 285    .name = "source",
 286    .implied_opt_name = "file",
 287    .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
 288    .desc = {
 289        { }
 290    },
 291};
 292
 293static int G_GNUC_PRINTF(2, 3) qprintf(bool quiet, const char *fmt, ...)
 294{
 295    int ret = 0;
 296    if (!quiet) {
 297        va_list args;
 298        va_start(args, fmt);
 299        ret = vprintf(fmt, args);
 300        va_end(args);
 301    }
 302    return ret;
 303}
 304
 305
 306static int print_block_option_help(const char *filename, const char *fmt)
 307{
 308    BlockDriver *drv, *proto_drv;
 309    QemuOptsList *create_opts = NULL;
 310    Error *local_err = NULL;
 311
 312    /* Find driver and parse its options */
 313    drv = bdrv_find_format(fmt);
 314    if (!drv) {
 315        error_report("Unknown file format '%s'", fmt);
 316        return 1;
 317    }
 318
 319    if (!drv->create_opts) {
 320        error_report("Format driver '%s' does not support image creation", fmt);
 321        return 1;
 322    }
 323
 324    create_opts = qemu_opts_append(create_opts, drv->create_opts);
 325    if (filename) {
 326        proto_drv = bdrv_find_protocol(filename, true, &local_err);
 327        if (!proto_drv) {
 328            error_report_err(local_err);
 329            qemu_opts_free(create_opts);
 330            return 1;
 331        }
 332        if (!proto_drv->create_opts) {
 333            error_report("Protocol driver '%s' does not support image creation",
 334                         proto_drv->format_name);
 335            qemu_opts_free(create_opts);
 336            return 1;
 337        }
 338        create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
 339    }
 340
 341    if (filename) {
 342        printf("Supported options:\n");
 343    } else {
 344        printf("Supported %s options:\n", fmt);
 345    }
 346    qemu_opts_print_help(create_opts, false);
 347    qemu_opts_free(create_opts);
 348
 349    if (!filename) {
 350        printf("\n"
 351               "The protocol level may support further options.\n"
 352               "Specify the target filename to include those options.\n");
 353    }
 354
 355    return 0;
 356}
 357
 358
 359static BlockBackend *img_open_opts(const char *optstr,
 360                                   QemuOpts *opts, int flags, bool writethrough,
 361                                   bool quiet, bool force_share)
 362{
 363    QDict *options;
 364    Error *local_err = NULL;
 365    BlockBackend *blk;
 366    options = qemu_opts_to_qdict(opts, NULL);
 367    if (force_share) {
 368        if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
 369            && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
 370            error_report("--force-share/-U conflicts with image options");
 371            qobject_unref(options);
 372            return NULL;
 373        }
 374        qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
 375    }
 376    blk = blk_new_open(NULL, NULL, options, flags, &local_err);
 377    if (!blk) {
 378        error_reportf_err(local_err, "Could not open '%s': ", optstr);
 379        return NULL;
 380    }
 381    blk_set_enable_write_cache(blk, !writethrough);
 382
 383    return blk;
 384}
 385
 386static BlockBackend *img_open_file(const char *filename,
 387                                   QDict *options,
 388                                   const char *fmt, int flags,
 389                                   bool writethrough, bool quiet,
 390                                   bool force_share)
 391{
 392    BlockBackend *blk;
 393    Error *local_err = NULL;
 394
 395    if (!options) {
 396        options = qdict_new();
 397    }
 398    if (fmt) {
 399        qdict_put_str(options, "driver", fmt);
 400    }
 401
 402    if (force_share) {
 403        qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
 404    }
 405    blk = blk_new_open(filename, NULL, options, flags, &local_err);
 406    if (!blk) {
 407        error_reportf_err(local_err, "Could not open '%s': ", filename);
 408        return NULL;
 409    }
 410    blk_set_enable_write_cache(blk, !writethrough);
 411
 412    return blk;
 413}
 414
 415
 416static int img_add_key_secrets(void *opaque,
 417                               const char *name, const char *value,
 418                               Error **errp)
 419{
 420    QDict *options = opaque;
 421
 422    if (g_str_has_suffix(name, "key-secret")) {
 423        qdict_put_str(options, name, value);
 424    }
 425
 426    return 0;
 427}
 428
 429
 430static BlockBackend *img_open(bool image_opts,
 431                              const char *filename,
 432                              const char *fmt, int flags, bool writethrough,
 433                              bool quiet, bool force_share)
 434{
 435    BlockBackend *blk;
 436    if (image_opts) {
 437        QemuOpts *opts;
 438        if (fmt) {
 439            error_report("--image-opts and --format are mutually exclusive");
 440            return NULL;
 441        }
 442        opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
 443                                       filename, true);
 444        if (!opts) {
 445            return NULL;
 446        }
 447        blk = img_open_opts(filename, opts, flags, writethrough, quiet,
 448                            force_share);
 449    } else {
 450        blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
 451                            force_share);
 452    }
 453
 454    if (blk) {
 455        blk_set_force_allow_inactivate(blk);
 456    }
 457
 458    return blk;
 459}
 460
 461
 462static int add_old_style_options(const char *fmt, QemuOpts *opts,
 463                                 const char *base_filename,
 464                                 const char *base_fmt)
 465{
 466    if (base_filename) {
 467        if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
 468                          NULL)) {
 469            error_report("Backing file not supported for file format '%s'",
 470                         fmt);
 471            return -1;
 472        }
 473    }
 474    if (base_fmt) {
 475        if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
 476            error_report("Backing file format not supported for file "
 477                         "format '%s'", fmt);
 478            return -1;
 479        }
 480    }
 481    return 0;
 482}
 483
 484static int64_t cvtnum_full(const char *name, const char *value, int64_t min,
 485                           int64_t max)
 486{
 487    int err;
 488    uint64_t res;
 489
 490    err = qemu_strtosz(value, NULL, &res);
 491    if (err < 0 && err != -ERANGE) {
 492        error_report("Invalid %s specified. You may use "
 493                     "k, M, G, T, P or E suffixes for", name);
 494        error_report("kilobytes, megabytes, gigabytes, terabytes, "
 495                     "petabytes and exabytes.");
 496        return err;
 497    }
 498    if (err == -ERANGE || res > max || res < min) {
 499        error_report("Invalid %s specified. Must be between %" PRId64
 500                     " and %" PRId64 ".", name, min, max);
 501        return -ERANGE;
 502    }
 503    return res;
 504}
 505
 506static int64_t cvtnum(const char *name, const char *value)
 507{
 508    return cvtnum_full(name, value, 0, INT64_MAX);
 509}
 510
 511static int img_create(int argc, char **argv)
 512{
 513    int c;
 514    uint64_t img_size = -1;
 515    const char *fmt = "raw";
 516    const char *base_fmt = NULL;
 517    const char *filename;
 518    const char *base_filename = NULL;
 519    char *options = NULL;
 520    Error *local_err = NULL;
 521    bool quiet = false;
 522    int flags = 0;
 523
 524    for(;;) {
 525        static const struct option long_options[] = {
 526            {"help", no_argument, 0, 'h'},
 527            {"object", required_argument, 0, OPTION_OBJECT},
 528            {0, 0, 0, 0}
 529        };
 530        c = getopt_long(argc, argv, ":F:b:f:ho:qu",
 531                        long_options, NULL);
 532        if (c == -1) {
 533            break;
 534        }
 535        switch(c) {
 536        case ':':
 537            missing_argument(argv[optind - 1]);
 538            break;
 539        case '?':
 540            unrecognized_option(argv[optind - 1]);
 541            break;
 542        case 'h':
 543            help();
 544            break;
 545        case 'F':
 546            base_fmt = optarg;
 547            break;
 548        case 'b':
 549            base_filename = optarg;
 550            break;
 551        case 'f':
 552            fmt = optarg;
 553            break;
 554        case 'o':
 555            if (accumulate_options(&options, optarg) < 0) {
 556                goto fail;
 557            }
 558            break;
 559        case 'q':
 560            quiet = true;
 561            break;
 562        case 'u':
 563            flags |= BDRV_O_NO_BACKING;
 564            break;
 565        case OPTION_OBJECT:
 566            user_creatable_process_cmdline(optarg);
 567            break;
 568        }
 569    }
 570
 571    /* Get the filename */
 572    filename = (optind < argc) ? argv[optind] : NULL;
 573    if (options && has_help_option(options)) {
 574        g_free(options);
 575        return print_block_option_help(filename, fmt);
 576    }
 577
 578    if (optind >= argc) {
 579        error_exit("Expecting image file name");
 580    }
 581    optind++;
 582
 583    /* Get image size, if specified */
 584    if (optind < argc) {
 585        int64_t sval;
 586
 587        sval = cvtnum("image size", argv[optind++]);
 588        if (sval < 0) {
 589            goto fail;
 590        }
 591        img_size = (uint64_t)sval;
 592    }
 593    if (optind != argc) {
 594        error_exit("Unexpected argument: %s", argv[optind]);
 595    }
 596
 597    bdrv_img_create(filename, fmt, base_filename, base_fmt,
 598                    options, img_size, flags, quiet, &local_err);
 599    if (local_err) {
 600        error_reportf_err(local_err, "%s: ", filename);
 601        goto fail;
 602    }
 603
 604    g_free(options);
 605    return 0;
 606
 607fail:
 608    g_free(options);
 609    return 1;
 610}
 611
 612static void dump_json_image_check(ImageCheck *check, bool quiet)
 613{
 614    GString *str;
 615    QObject *obj;
 616    Visitor *v = qobject_output_visitor_new(&obj);
 617
 618    visit_type_ImageCheck(v, NULL, &check, &error_abort);
 619    visit_complete(v, &obj);
 620    str = qobject_to_json_pretty(obj, true);
 621    assert(str != NULL);
 622    qprintf(quiet, "%s\n", str->str);
 623    qobject_unref(obj);
 624    visit_free(v);
 625    g_string_free(str, true);
 626}
 627
 628static void dump_human_image_check(ImageCheck *check, bool quiet)
 629{
 630    if (!(check->corruptions || check->leaks || check->check_errors)) {
 631        qprintf(quiet, "No errors were found on the image.\n");
 632    } else {
 633        if (check->corruptions) {
 634            qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
 635                    "Data may be corrupted, or further writes to the image "
 636                    "may corrupt it.\n",
 637                    check->corruptions);
 638        }
 639
 640        if (check->leaks) {
 641            qprintf(quiet,
 642                    "\n%" PRId64 " leaked clusters were found on the image.\n"
 643                    "This means waste of disk space, but no harm to data.\n",
 644                    check->leaks);
 645        }
 646
 647        if (check->check_errors) {
 648            qprintf(quiet,
 649                    "\n%" PRId64
 650                    " internal errors have occurred during the check.\n",
 651                    check->check_errors);
 652        }
 653    }
 654
 655    if (check->total_clusters != 0 && check->allocated_clusters != 0) {
 656        qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
 657                "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
 658                check->allocated_clusters, check->total_clusters,
 659                check->allocated_clusters * 100.0 / check->total_clusters,
 660                check->fragmented_clusters * 100.0 / check->allocated_clusters,
 661                check->compressed_clusters * 100.0 /
 662                check->allocated_clusters);
 663    }
 664
 665    if (check->image_end_offset) {
 666        qprintf(quiet,
 667                "Image end offset: %" PRId64 "\n", check->image_end_offset);
 668    }
 669}
 670
 671static int collect_image_check(BlockDriverState *bs,
 672                   ImageCheck *check,
 673                   const char *filename,
 674                   const char *fmt,
 675                   int fix)
 676{
 677    int ret;
 678    BdrvCheckResult result;
 679
 680    ret = bdrv_check(bs, &result, fix);
 681    if (ret < 0) {
 682        return ret;
 683    }
 684
 685    check->filename                 = g_strdup(filename);
 686    check->format                   = g_strdup(bdrv_get_format_name(bs));
 687    check->check_errors             = result.check_errors;
 688    check->corruptions              = result.corruptions;
 689    check->has_corruptions          = result.corruptions != 0;
 690    check->leaks                    = result.leaks;
 691    check->has_leaks                = result.leaks != 0;
 692    check->corruptions_fixed        = result.corruptions_fixed;
 693    check->has_corruptions_fixed    = result.corruptions_fixed != 0;
 694    check->leaks_fixed              = result.leaks_fixed;
 695    check->has_leaks_fixed          = result.leaks_fixed != 0;
 696    check->image_end_offset         = result.image_end_offset;
 697    check->has_image_end_offset     = result.image_end_offset != 0;
 698    check->total_clusters           = result.bfi.total_clusters;
 699    check->has_total_clusters       = result.bfi.total_clusters != 0;
 700    check->allocated_clusters       = result.bfi.allocated_clusters;
 701    check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
 702    check->fragmented_clusters      = result.bfi.fragmented_clusters;
 703    check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
 704    check->compressed_clusters      = result.bfi.compressed_clusters;
 705    check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
 706
 707    return 0;
 708}
 709
 710/*
 711 * Checks an image for consistency. Exit codes:
 712 *
 713 *  0 - Check completed, image is good
 714 *  1 - Check not completed because of internal errors
 715 *  2 - Check completed, image is corrupted
 716 *  3 - Check completed, image has leaked clusters, but is good otherwise
 717 * 63 - Checks are not supported by the image format
 718 */
 719static int img_check(int argc, char **argv)
 720{
 721    int c, ret;
 722    OutputFormat output_format = OFORMAT_HUMAN;
 723    const char *filename, *fmt, *output, *cache;
 724    BlockBackend *blk;
 725    BlockDriverState *bs;
 726    int fix = 0;
 727    int flags = BDRV_O_CHECK;
 728    bool writethrough;
 729    ImageCheck *check;
 730    bool quiet = false;
 731    bool image_opts = false;
 732    bool force_share = false;
 733
 734    fmt = NULL;
 735    output = NULL;
 736    cache = BDRV_DEFAULT_CACHE;
 737
 738    for(;;) {
 739        int option_index = 0;
 740        static const struct option long_options[] = {
 741            {"help", no_argument, 0, 'h'},
 742            {"format", required_argument, 0, 'f'},
 743            {"repair", required_argument, 0, 'r'},
 744            {"output", required_argument, 0, OPTION_OUTPUT},
 745            {"object", required_argument, 0, OPTION_OBJECT},
 746            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
 747            {"force-share", no_argument, 0, 'U'},
 748            {0, 0, 0, 0}
 749        };
 750        c = getopt_long(argc, argv, ":hf:r:T:qU",
 751                        long_options, &option_index);
 752        if (c == -1) {
 753            break;
 754        }
 755        switch(c) {
 756        case ':':
 757            missing_argument(argv[optind - 1]);
 758            break;
 759        case '?':
 760            unrecognized_option(argv[optind - 1]);
 761            break;
 762        case 'h':
 763            help();
 764            break;
 765        case 'f':
 766            fmt = optarg;
 767            break;
 768        case 'r':
 769            flags |= BDRV_O_RDWR;
 770
 771            if (!strcmp(optarg, "leaks")) {
 772                fix = BDRV_FIX_LEAKS;
 773            } else if (!strcmp(optarg, "all")) {
 774                fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
 775            } else {
 776                error_exit("Unknown option value for -r "
 777                           "(expecting 'leaks' or 'all'): %s", optarg);
 778            }
 779            break;
 780        case OPTION_OUTPUT:
 781            output = optarg;
 782            break;
 783        case 'T':
 784            cache = optarg;
 785            break;
 786        case 'q':
 787            quiet = true;
 788            break;
 789        case 'U':
 790            force_share = true;
 791            break;
 792        case OPTION_OBJECT:
 793            user_creatable_process_cmdline(optarg);
 794            break;
 795        case OPTION_IMAGE_OPTS:
 796            image_opts = true;
 797            break;
 798        }
 799    }
 800    if (optind != argc - 1) {
 801        error_exit("Expecting one image file name");
 802    }
 803    filename = argv[optind++];
 804
 805    if (output && !strcmp(output, "json")) {
 806        output_format = OFORMAT_JSON;
 807    } else if (output && !strcmp(output, "human")) {
 808        output_format = OFORMAT_HUMAN;
 809    } else if (output) {
 810        error_report("--output must be used with human or json as argument.");
 811        return 1;
 812    }
 813
 814    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
 815    if (ret < 0) {
 816        error_report("Invalid source cache option: %s", cache);
 817        return 1;
 818    }
 819
 820    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
 821                   force_share);
 822    if (!blk) {
 823        return 1;
 824    }
 825    bs = blk_bs(blk);
 826
 827    check = g_new0(ImageCheck, 1);
 828    ret = collect_image_check(bs, check, filename, fmt, fix);
 829
 830    if (ret == -ENOTSUP) {
 831        error_report("This image format does not support checks");
 832        ret = 63;
 833        goto fail;
 834    }
 835
 836    if (check->corruptions_fixed || check->leaks_fixed) {
 837        int corruptions_fixed, leaks_fixed;
 838        bool has_leaks_fixed, has_corruptions_fixed;
 839
 840        leaks_fixed         = check->leaks_fixed;
 841        has_leaks_fixed     = check->has_leaks_fixed;
 842        corruptions_fixed   = check->corruptions_fixed;
 843        has_corruptions_fixed = check->has_corruptions_fixed;
 844
 845        if (output_format == OFORMAT_HUMAN) {
 846            qprintf(quiet,
 847                    "The following inconsistencies were found and repaired:\n\n"
 848                    "    %" PRId64 " leaked clusters\n"
 849                    "    %" PRId64 " corruptions\n\n"
 850                    "Double checking the fixed image now...\n",
 851                    check->leaks_fixed,
 852                    check->corruptions_fixed);
 853        }
 854
 855        qapi_free_ImageCheck(check);
 856        check = g_new0(ImageCheck, 1);
 857        ret = collect_image_check(bs, check, filename, fmt, 0);
 858
 859        check->leaks_fixed          = leaks_fixed;
 860        check->has_leaks_fixed      = has_leaks_fixed;
 861        check->corruptions_fixed    = corruptions_fixed;
 862        check->has_corruptions_fixed = has_corruptions_fixed;
 863    }
 864
 865    if (!ret) {
 866        switch (output_format) {
 867        case OFORMAT_HUMAN:
 868            dump_human_image_check(check, quiet);
 869            break;
 870        case OFORMAT_JSON:
 871            dump_json_image_check(check, quiet);
 872            break;
 873        }
 874    }
 875
 876    if (ret || check->check_errors) {
 877        if (ret) {
 878            error_report("Check failed: %s", strerror(-ret));
 879        } else {
 880            error_report("Check failed");
 881        }
 882        ret = 1;
 883        goto fail;
 884    }
 885
 886    if (check->corruptions) {
 887        ret = 2;
 888    } else if (check->leaks) {
 889        ret = 3;
 890    } else {
 891        ret = 0;
 892    }
 893
 894fail:
 895    qapi_free_ImageCheck(check);
 896    blk_unref(blk);
 897    return ret;
 898}
 899
 900typedef struct CommonBlockJobCBInfo {
 901    BlockDriverState *bs;
 902    Error **errp;
 903} CommonBlockJobCBInfo;
 904
 905static void common_block_job_cb(void *opaque, int ret)
 906{
 907    CommonBlockJobCBInfo *cbi = opaque;
 908
 909    if (ret < 0) {
 910        error_setg_errno(cbi->errp, -ret, "Block job failed");
 911    }
 912}
 913
 914static void run_block_job(BlockJob *job, Error **errp)
 915{
 916    uint64_t progress_current, progress_total;
 917    AioContext *aio_context = block_job_get_aio_context(job);
 918    int ret = 0;
 919
 920    job_lock();
 921    job_ref_locked(&job->job);
 922    do {
 923        float progress = 0.0f;
 924        job_unlock();
 925        aio_poll(aio_context, true);
 926
 927        progress_get_snapshot(&job->job.progress, &progress_current,
 928                              &progress_total);
 929        if (progress_total) {
 930            progress = (float)progress_current / progress_total * 100.f;
 931        }
 932        qemu_progress_print(progress, 0);
 933        job_lock();
 934    } while (!job_is_ready_locked(&job->job) &&
 935             !job_is_completed_locked(&job->job));
 936
 937    if (!job_is_completed_locked(&job->job)) {
 938        ret = job_complete_sync_locked(&job->job, errp);
 939    } else {
 940        ret = job->job.ret;
 941    }
 942    job_unref_locked(&job->job);
 943    job_unlock();
 944
 945    /* publish completion progress only when success */
 946    if (!ret) {
 947        qemu_progress_print(100.f, 0);
 948    }
 949}
 950
 951static int img_commit(int argc, char **argv)
 952{
 953    int c, ret, flags;
 954    const char *filename, *fmt, *cache, *base;
 955    BlockBackend *blk;
 956    BlockDriverState *bs, *base_bs;
 957    BlockJob *job;
 958    bool progress = false, quiet = false, drop = false;
 959    bool writethrough;
 960    Error *local_err = NULL;
 961    CommonBlockJobCBInfo cbi;
 962    bool image_opts = false;
 963    AioContext *aio_context;
 964    int64_t rate_limit = 0;
 965
 966    fmt = NULL;
 967    cache = BDRV_DEFAULT_CACHE;
 968    base = NULL;
 969    for(;;) {
 970        static const struct option long_options[] = {
 971            {"help", no_argument, 0, 'h'},
 972            {"object", required_argument, 0, OPTION_OBJECT},
 973            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
 974            {0, 0, 0, 0}
 975        };
 976        c = getopt_long(argc, argv, ":f:ht:b:dpqr:",
 977                        long_options, NULL);
 978        if (c == -1) {
 979            break;
 980        }
 981        switch(c) {
 982        case ':':
 983            missing_argument(argv[optind - 1]);
 984            break;
 985        case '?':
 986            unrecognized_option(argv[optind - 1]);
 987            break;
 988        case 'h':
 989            help();
 990            break;
 991        case 'f':
 992            fmt = optarg;
 993            break;
 994        case 't':
 995            cache = optarg;
 996            break;
 997        case 'b':
 998            base = optarg;
 999            /* -b implies -d */
1000            drop = true;
1001            break;
1002        case 'd':
1003            drop = true;
1004            break;
1005        case 'p':
1006            progress = true;
1007            break;
1008        case 'q':
1009            quiet = true;
1010            break;
1011        case 'r':
1012            rate_limit = cvtnum("rate limit", optarg);
1013            if (rate_limit < 0) {
1014                return 1;
1015            }
1016            break;
1017        case OPTION_OBJECT:
1018            user_creatable_process_cmdline(optarg);
1019            break;
1020        case OPTION_IMAGE_OPTS:
1021            image_opts = true;
1022            break;
1023        }
1024    }
1025
1026    /* Progress is not shown in Quiet mode */
1027    if (quiet) {
1028        progress = false;
1029    }
1030
1031    if (optind != argc - 1) {
1032        error_exit("Expecting one image file name");
1033    }
1034    filename = argv[optind++];
1035
1036    flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1037    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1038    if (ret < 0) {
1039        error_report("Invalid cache option: %s", cache);
1040        return 1;
1041    }
1042
1043    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1044                   false);
1045    if (!blk) {
1046        return 1;
1047    }
1048    bs = blk_bs(blk);
1049
1050    qemu_progress_init(progress, 1.f);
1051    qemu_progress_print(0.f, 100);
1052
1053    if (base) {
1054        base_bs = bdrv_find_backing_image(bs, base);
1055        if (!base_bs) {
1056            error_setg(&local_err,
1057                       "Did not find '%s' in the backing chain of '%s'",
1058                       base, filename);
1059            goto done;
1060        }
1061    } else {
1062        /* This is different from QMP, which by default uses the deepest file in
1063         * the backing chain (i.e., the very base); however, the traditional
1064         * behavior of qemu-img commit is using the immediate backing file. */
1065        base_bs = bdrv_backing_chain_next(bs);
1066        if (!base_bs) {
1067            error_setg(&local_err, "Image does not have a backing file");
1068            goto done;
1069        }
1070    }
1071
1072    cbi = (CommonBlockJobCBInfo){
1073        .errp = &local_err,
1074        .bs   = bs,
1075    };
1076
1077    aio_context = bdrv_get_aio_context(bs);
1078    aio_context_acquire(aio_context);
1079    commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit,
1080                        BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1081                        &cbi, false, &local_err);
1082    aio_context_release(aio_context);
1083    if (local_err) {
1084        goto done;
1085    }
1086
1087    /* When the block job completes, the BlockBackend reference will point to
1088     * the old backing file. In order to avoid that the top image is already
1089     * deleted, so we can still empty it afterwards, increment the reference
1090     * counter here preemptively. */
1091    if (!drop) {
1092        bdrv_ref(bs);
1093    }
1094
1095    job = block_job_get("commit");
1096    assert(job);
1097    run_block_job(job, &local_err);
1098    if (local_err) {
1099        goto unref_backing;
1100    }
1101
1102    if (!drop) {
1103        BlockBackend *old_backing_blk;
1104
1105        old_backing_blk = blk_new_with_bs(bs, BLK_PERM_WRITE, BLK_PERM_ALL,
1106                                          &local_err);
1107        if (!old_backing_blk) {
1108            goto unref_backing;
1109        }
1110        ret = blk_make_empty(old_backing_blk, &local_err);
1111        blk_unref(old_backing_blk);
1112        if (ret == -ENOTSUP) {
1113            error_free(local_err);
1114            local_err = NULL;
1115        } else if (ret < 0) {
1116            goto unref_backing;
1117        }
1118    }
1119
1120unref_backing:
1121    if (!drop) {
1122        bdrv_unref(bs);
1123    }
1124
1125done:
1126    qemu_progress_end();
1127
1128    /*
1129     * Manually inactivate the image first because this way we can know whether
1130     * an error occurred. blk_unref() doesn't tell us about failures.
1131     */
1132    ret = bdrv_inactivate_all();
1133    if (ret < 0 && !local_err) {
1134        error_setg_errno(&local_err, -ret, "Error while closing the image");
1135    }
1136    blk_unref(blk);
1137
1138    if (local_err) {
1139        error_report_err(local_err);
1140        return 1;
1141    }
1142
1143    qprintf(quiet, "Image committed.\n");
1144    return 0;
1145}
1146
1147/*
1148 * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1149 * of the first sector boundary within buf where the sector contains a
1150 * non-zero byte.  This function is robust to a buffer that is not
1151 * sector-aligned.
1152 */
1153static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1154{
1155    int64_t i;
1156    int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1157
1158    for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1159        if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1160            return i;
1161        }
1162    }
1163    if (i < n && !buffer_is_zero(buf + i, n - end)) {
1164        return i;
1165    }
1166    return -1;
1167}
1168
1169/*
1170 * Returns true iff the first sector pointed to by 'buf' contains at least
1171 * a non-NUL byte.
1172 *
1173 * 'pnum' is set to the number of sectors (including and immediately following
1174 * the first one) that are known to be in the same allocated/unallocated state.
1175 * The function will try to align the end offset to alignment boundaries so
1176 * that the request will at least end aligned and consecutive requests will
1177 * also start at an aligned offset.
1178 */
1179static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1180                                int64_t sector_num, int alignment)
1181{
1182    bool is_zero;
1183    int i, tail;
1184
1185    if (n <= 0) {
1186        *pnum = 0;
1187        return 0;
1188    }
1189    is_zero = buffer_is_zero(buf, BDRV_SECTOR_SIZE);
1190    for(i = 1; i < n; i++) {
1191        buf += BDRV_SECTOR_SIZE;
1192        if (is_zero != buffer_is_zero(buf, BDRV_SECTOR_SIZE)) {
1193            break;
1194        }
1195    }
1196
1197    if (i == n) {
1198        /*
1199         * The whole buf is the same.
1200         * No reason to split it into chunks, so return now.
1201         */
1202        *pnum = i;
1203        return !is_zero;
1204    }
1205
1206    tail = (sector_num + i) & (alignment - 1);
1207    if (tail) {
1208        if (is_zero && i <= tail) {
1209            /*
1210             * For sure next sector after i is data, and it will rewrite this
1211             * tail anyway due to RMW. So, let's just write data now.
1212             */
1213            is_zero = false;
1214        }
1215        if (!is_zero) {
1216            /* If possible, align up end offset of allocated areas. */
1217            i += alignment - tail;
1218            i = MIN(i, n);
1219        } else {
1220            /*
1221             * For sure next sector after i is data, and it will rewrite this
1222             * tail anyway due to RMW. Better is avoid RMW and write zeroes up
1223             * to aligned bound.
1224             */
1225            i -= tail;
1226        }
1227    }
1228    *pnum = i;
1229    return !is_zero;
1230}
1231
1232/*
1233 * Like is_allocated_sectors, but if the buffer starts with a used sector,
1234 * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1235 * breaking up write requests for only small sparse areas.
1236 */
1237static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1238    int min, int64_t sector_num, int alignment)
1239{
1240    int ret;
1241    int num_checked, num_used;
1242
1243    if (n < min) {
1244        min = n;
1245    }
1246
1247    ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1248    if (!ret) {
1249        return ret;
1250    }
1251
1252    num_used = *pnum;
1253    buf += BDRV_SECTOR_SIZE * *pnum;
1254    n -= *pnum;
1255    sector_num += *pnum;
1256    num_checked = num_used;
1257
1258    while (n > 0) {
1259        ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1260
1261        buf += BDRV_SECTOR_SIZE * *pnum;
1262        n -= *pnum;
1263        sector_num += *pnum;
1264        num_checked += *pnum;
1265        if (ret) {
1266            num_used = num_checked;
1267        } else if (*pnum >= min) {
1268            break;
1269        }
1270    }
1271
1272    *pnum = num_used;
1273    return 1;
1274}
1275
1276/*
1277 * Compares two buffers sector by sector. Returns 0 if the first
1278 * sector of each buffer matches, non-zero otherwise.
1279 *
1280 * pnum is set to the sector-aligned size of the buffer prefix that
1281 * has the same matching status as the first sector.
1282 */
1283static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1284                           int64_t bytes, int64_t *pnum)
1285{
1286    bool res;
1287    int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
1288
1289    assert(bytes > 0);
1290
1291    res = !!memcmp(buf1, buf2, i);
1292    while (i < bytes) {
1293        int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
1294
1295        if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1296            break;
1297        }
1298        i += len;
1299    }
1300
1301    *pnum = i;
1302    return res;
1303}
1304
1305#define IO_BUF_SIZE (2 * MiB)
1306
1307/*
1308 * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1309 *
1310 * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1311 * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1312 * failure), and 4 on error (the exit status for read errors), after emitting
1313 * an error message.
1314 *
1315 * @param blk:  BlockBackend for the image
1316 * @param offset: Starting offset to check
1317 * @param bytes: Number of bytes to check
1318 * @param filename: Name of disk file we are checking (logging purpose)
1319 * @param buffer: Allocated buffer for storing read data
1320 * @param quiet: Flag for quiet mode
1321 */
1322static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1323                               int64_t bytes, const char *filename,
1324                               uint8_t *buffer, bool quiet)
1325{
1326    int ret = 0;
1327    int64_t idx;
1328
1329    ret = blk_pread(blk, offset, bytes, buffer, 0);
1330    if (ret < 0) {
1331        error_report("Error while reading offset %" PRId64 " of %s: %s",
1332                     offset, filename, strerror(-ret));
1333        return 4;
1334    }
1335    idx = find_nonzero(buffer, bytes);
1336    if (idx >= 0) {
1337        qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1338                offset + idx);
1339        return 1;
1340    }
1341
1342    return 0;
1343}
1344
1345/*
1346 * Compares two images. Exit codes:
1347 *
1348 * 0 - Images are identical or the requested help was printed
1349 * 1 - Images differ
1350 * >1 - Error occurred
1351 */
1352static int img_compare(int argc, char **argv)
1353{
1354    const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1355    BlockBackend *blk1, *blk2;
1356    BlockDriverState *bs1, *bs2;
1357    int64_t total_size1, total_size2;
1358    uint8_t *buf1 = NULL, *buf2 = NULL;
1359    int64_t pnum1, pnum2;
1360    int allocated1, allocated2;
1361    int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1362    bool progress = false, quiet = false, strict = false;
1363    int flags;
1364    bool writethrough;
1365    int64_t total_size;
1366    int64_t offset = 0;
1367    int64_t chunk;
1368    int c;
1369    uint64_t progress_base;
1370    bool image_opts = false;
1371    bool force_share = false;
1372
1373    cache = BDRV_DEFAULT_CACHE;
1374    for (;;) {
1375        static const struct option long_options[] = {
1376            {"help", no_argument, 0, 'h'},
1377            {"object", required_argument, 0, OPTION_OBJECT},
1378            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1379            {"force-share", no_argument, 0, 'U'},
1380            {0, 0, 0, 0}
1381        };
1382        c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1383                        long_options, NULL);
1384        if (c == -1) {
1385            break;
1386        }
1387        switch (c) {
1388        case ':':
1389            missing_argument(argv[optind - 1]);
1390            break;
1391        case '?':
1392            unrecognized_option(argv[optind - 1]);
1393            break;
1394        case 'h':
1395            help();
1396            break;
1397        case 'f':
1398            fmt1 = optarg;
1399            break;
1400        case 'F':
1401            fmt2 = optarg;
1402            break;
1403        case 'T':
1404            cache = optarg;
1405            break;
1406        case 'p':
1407            progress = true;
1408            break;
1409        case 'q':
1410            quiet = true;
1411            break;
1412        case 's':
1413            strict = true;
1414            break;
1415        case 'U':
1416            force_share = true;
1417            break;
1418        case OPTION_OBJECT:
1419            {
1420                Error *local_err = NULL;
1421
1422                if (!user_creatable_add_from_str(optarg, &local_err)) {
1423                    if (local_err) {
1424                        error_report_err(local_err);
1425                        exit(2);
1426                    } else {
1427                        /* Help was printed */
1428                        exit(EXIT_SUCCESS);
1429                    }
1430                }
1431                break;
1432            }
1433        case OPTION_IMAGE_OPTS:
1434            image_opts = true;
1435            break;
1436        }
1437    }
1438
1439    /* Progress is not shown in Quiet mode */
1440    if (quiet) {
1441        progress = false;
1442    }
1443
1444
1445    if (optind != argc - 2) {
1446        error_exit("Expecting two image file names");
1447    }
1448    filename1 = argv[optind++];
1449    filename2 = argv[optind++];
1450
1451    /* Initialize before goto out */
1452    qemu_progress_init(progress, 2.0);
1453
1454    flags = 0;
1455    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1456    if (ret < 0) {
1457        error_report("Invalid source cache option: %s", cache);
1458        ret = 2;
1459        goto out3;
1460    }
1461
1462    blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1463                    force_share);
1464    if (!blk1) {
1465        ret = 2;
1466        goto out3;
1467    }
1468
1469    blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1470                    force_share);
1471    if (!blk2) {
1472        ret = 2;
1473        goto out2;
1474    }
1475    bs1 = blk_bs(blk1);
1476    bs2 = blk_bs(blk2);
1477
1478    buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1479    buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1480    total_size1 = blk_getlength(blk1);
1481    if (total_size1 < 0) {
1482        error_report("Can't get size of %s: %s",
1483                     filename1, strerror(-total_size1));
1484        ret = 4;
1485        goto out;
1486    }
1487    total_size2 = blk_getlength(blk2);
1488    if (total_size2 < 0) {
1489        error_report("Can't get size of %s: %s",
1490                     filename2, strerror(-total_size2));
1491        ret = 4;
1492        goto out;
1493    }
1494    total_size = MIN(total_size1, total_size2);
1495    progress_base = MAX(total_size1, total_size2);
1496
1497    qemu_progress_print(0, 100);
1498
1499    if (strict && total_size1 != total_size2) {
1500        ret = 1;
1501        qprintf(quiet, "Strict mode: Image size mismatch!\n");
1502        goto out;
1503    }
1504
1505    while (offset < total_size) {
1506        int status1, status2;
1507
1508        status1 = bdrv_block_status_above(bs1, NULL, offset,
1509                                          total_size1 - offset, &pnum1, NULL,
1510                                          NULL);
1511        if (status1 < 0) {
1512            ret = 3;
1513            error_report("Sector allocation test failed for %s", filename1);
1514            goto out;
1515        }
1516        allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1517
1518        status2 = bdrv_block_status_above(bs2, NULL, offset,
1519                                          total_size2 - offset, &pnum2, NULL,
1520                                          NULL);
1521        if (status2 < 0) {
1522            ret = 3;
1523            error_report("Sector allocation test failed for %s", filename2);
1524            goto out;
1525        }
1526        allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1527
1528        assert(pnum1 && pnum2);
1529        chunk = MIN(pnum1, pnum2);
1530
1531        if (strict) {
1532            if (status1 != status2) {
1533                ret = 1;
1534                qprintf(quiet, "Strict mode: Offset %" PRId64
1535                        " block status mismatch!\n", offset);
1536                goto out;
1537            }
1538        }
1539        if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1540            /* nothing to do */
1541        } else if (allocated1 == allocated2) {
1542            if (allocated1) {
1543                int64_t pnum;
1544
1545                chunk = MIN(chunk, IO_BUF_SIZE);
1546                ret = blk_pread(blk1, offset, chunk, buf1, 0);
1547                if (ret < 0) {
1548                    error_report("Error while reading offset %" PRId64
1549                                 " of %s: %s",
1550                                 offset, filename1, strerror(-ret));
1551                    ret = 4;
1552                    goto out;
1553                }
1554                ret = blk_pread(blk2, offset, chunk, buf2, 0);
1555                if (ret < 0) {
1556                    error_report("Error while reading offset %" PRId64
1557                                 " of %s: %s",
1558                                 offset, filename2, strerror(-ret));
1559                    ret = 4;
1560                    goto out;
1561                }
1562                ret = compare_buffers(buf1, buf2, chunk, &pnum);
1563                if (ret || pnum != chunk) {
1564                    qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1565                            offset + (ret ? 0 : pnum));
1566                    ret = 1;
1567                    goto out;
1568                }
1569            }
1570        } else {
1571            chunk = MIN(chunk, IO_BUF_SIZE);
1572            if (allocated1) {
1573                ret = check_empty_sectors(blk1, offset, chunk,
1574                                          filename1, buf1, quiet);
1575            } else {
1576                ret = check_empty_sectors(blk2, offset, chunk,
1577                                          filename2, buf1, quiet);
1578            }
1579            if (ret) {
1580                goto out;
1581            }
1582        }
1583        offset += chunk;
1584        qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1585    }
1586
1587    if (total_size1 != total_size2) {
1588        BlockBackend *blk_over;
1589        const char *filename_over;
1590
1591        qprintf(quiet, "Warning: Image size mismatch!\n");
1592        if (total_size1 > total_size2) {
1593            blk_over = blk1;
1594            filename_over = filename1;
1595        } else {
1596            blk_over = blk2;
1597            filename_over = filename2;
1598        }
1599
1600        while (offset < progress_base) {
1601            ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1602                                          progress_base - offset, &chunk,
1603                                          NULL, NULL);
1604            if (ret < 0) {
1605                ret = 3;
1606                error_report("Sector allocation test failed for %s",
1607                             filename_over);
1608                goto out;
1609
1610            }
1611            if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1612                chunk = MIN(chunk, IO_BUF_SIZE);
1613                ret = check_empty_sectors(blk_over, offset, chunk,
1614                                          filename_over, buf1, quiet);
1615                if (ret) {
1616                    goto out;
1617                }
1618            }
1619            offset += chunk;
1620            qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1621        }
1622    }
1623
1624    qprintf(quiet, "Images are identical.\n");
1625    ret = 0;
1626
1627out:
1628    qemu_vfree(buf1);
1629    qemu_vfree(buf2);
1630    blk_unref(blk2);
1631out2:
1632    blk_unref(blk1);
1633out3:
1634    qemu_progress_end();
1635    return ret;
1636}
1637
1638/* Convenience wrapper around qmp_block_dirty_bitmap_merge */
1639static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name,
1640                                  const char *src_node, const char *src_name,
1641                                  Error **errp)
1642{
1643    BlockDirtyBitmapOrStr *merge_src;
1644    BlockDirtyBitmapOrStrList *list = NULL;
1645
1646    merge_src = g_new0(BlockDirtyBitmapOrStr, 1);
1647    merge_src->type = QTYPE_QDICT;
1648    merge_src->u.external.node = g_strdup(src_node);
1649    merge_src->u.external.name = g_strdup(src_name);
1650    QAPI_LIST_PREPEND(list, merge_src);
1651    qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp);
1652    qapi_free_BlockDirtyBitmapOrStrList(list);
1653}
1654
1655enum ImgConvertBlockStatus {
1656    BLK_DATA,
1657    BLK_ZERO,
1658    BLK_BACKING_FILE,
1659};
1660
1661#define MAX_COROUTINES 16
1662#define CONVERT_THROTTLE_GROUP "img_convert"
1663
1664typedef struct ImgConvertState {
1665    BlockBackend **src;
1666    int64_t *src_sectors;
1667    int *src_alignment;
1668    int src_num;
1669    int64_t total_sectors;
1670    int64_t allocated_sectors;
1671    int64_t allocated_done;
1672    int64_t sector_num;
1673    int64_t wr_offs;
1674    enum ImgConvertBlockStatus status;
1675    int64_t sector_next_status;
1676    BlockBackend *target;
1677    bool has_zero_init;
1678    bool compressed;
1679    bool target_is_new;
1680    bool target_has_backing;
1681    int64_t target_backing_sectors; /* negative if unknown */
1682    bool wr_in_order;
1683    bool copy_range;
1684    bool salvage;
1685    bool quiet;
1686    int min_sparse;
1687    int alignment;
1688    size_t cluster_sectors;
1689    size_t buf_sectors;
1690    long num_coroutines;
1691    int running_coroutines;
1692    Coroutine *co[MAX_COROUTINES];
1693    int64_t wait_sector_num[MAX_COROUTINES];
1694    CoMutex lock;
1695    int ret;
1696} ImgConvertState;
1697
1698static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1699                                int *src_cur, int64_t *src_cur_offset)
1700{
1701    *src_cur = 0;
1702    *src_cur_offset = 0;
1703    while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1704        *src_cur_offset += s->src_sectors[*src_cur];
1705        (*src_cur)++;
1706        assert(*src_cur < s->src_num);
1707    }
1708}
1709
1710static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1711{
1712    int64_t src_cur_offset;
1713    int ret, n, src_cur;
1714    bool post_backing_zero = false;
1715
1716    convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1717
1718    assert(s->total_sectors > sector_num);
1719    n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1720
1721    if (s->target_backing_sectors >= 0) {
1722        if (sector_num >= s->target_backing_sectors) {
1723            post_backing_zero = true;
1724        } else if (sector_num + n > s->target_backing_sectors) {
1725            /* Split requests around target_backing_sectors (because
1726             * starting from there, zeros are handled differently) */
1727            n = s->target_backing_sectors - sector_num;
1728        }
1729    }
1730
1731    if (s->sector_next_status <= sector_num) {
1732        uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1733        int64_t count;
1734        int tail;
1735        BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
1736        BlockDriverState *base;
1737
1738        if (s->target_has_backing) {
1739            base = bdrv_cow_bs(bdrv_skip_filters(src_bs));
1740        } else {
1741            base = NULL;
1742        }
1743
1744        do {
1745            count = n * BDRV_SECTOR_SIZE;
1746
1747            ret = bdrv_block_status_above(src_bs, base, offset, count, &count,
1748                                          NULL, NULL);
1749
1750            if (ret < 0) {
1751                if (s->salvage) {
1752                    if (n == 1) {
1753                        if (!s->quiet) {
1754                            warn_report("error while reading block status at "
1755                                        "offset %" PRIu64 ": %s", offset,
1756                                        strerror(-ret));
1757                        }
1758                        /* Just try to read the data, then */
1759                        ret = BDRV_BLOCK_DATA;
1760                        count = BDRV_SECTOR_SIZE;
1761                    } else {
1762                        /* Retry on a shorter range */
1763                        n = DIV_ROUND_UP(n, 4);
1764                    }
1765                } else {
1766                    error_report("error while reading block status at offset "
1767                                 "%" PRIu64 ": %s", offset, strerror(-ret));
1768                    return ret;
1769                }
1770            }
1771        } while (ret < 0);
1772
1773        n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1774
1775        /*
1776         * Avoid that s->sector_next_status becomes unaligned to the source
1777         * request alignment and/or cluster size to avoid unnecessary read
1778         * cycles.
1779         */
1780        tail = (sector_num - src_cur_offset + n) % s->src_alignment[src_cur];
1781        if (n > tail) {
1782            n -= tail;
1783        }
1784
1785        if (ret & BDRV_BLOCK_ZERO) {
1786            s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1787        } else if (ret & BDRV_BLOCK_DATA) {
1788            s->status = BLK_DATA;
1789        } else {
1790            s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1791        }
1792
1793        s->sector_next_status = sector_num + n;
1794    }
1795
1796    n = MIN(n, s->sector_next_status - sector_num);
1797    if (s->status == BLK_DATA) {
1798        n = MIN(n, s->buf_sectors);
1799    }
1800
1801    /* We need to write complete clusters for compressed images, so if an
1802     * unallocated area is shorter than that, we must consider the whole
1803     * cluster allocated. */
1804    if (s->compressed) {
1805        if (n < s->cluster_sectors) {
1806            n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1807            s->status = BLK_DATA;
1808        } else {
1809            n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1810        }
1811    }
1812
1813    return n;
1814}
1815
1816static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1817                                        int nb_sectors, uint8_t *buf)
1818{
1819    uint64_t single_read_until = 0;
1820    int n, ret;
1821
1822    assert(nb_sectors <= s->buf_sectors);
1823    while (nb_sectors > 0) {
1824        BlockBackend *blk;
1825        int src_cur;
1826        int64_t bs_sectors, src_cur_offset;
1827        uint64_t offset;
1828
1829        /* In the case of compression with multiple source files, we can get a
1830         * nb_sectors that spreads into the next part. So we must be able to
1831         * read across multiple BDSes for one convert_read() call. */
1832        convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1833        blk = s->src[src_cur];
1834        bs_sectors = s->src_sectors[src_cur];
1835
1836        offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1837
1838        n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1839        if (single_read_until > offset) {
1840            n = 1;
1841        }
1842
1843        ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1844        if (ret < 0) {
1845            if (s->salvage) {
1846                if (n > 1) {
1847                    single_read_until = offset + (n << BDRV_SECTOR_BITS);
1848                    continue;
1849                } else {
1850                    if (!s->quiet) {
1851                        warn_report("error while reading offset %" PRIu64
1852                                    ": %s", offset, strerror(-ret));
1853                    }
1854                    memset(buf, 0, BDRV_SECTOR_SIZE);
1855                }
1856            } else {
1857                return ret;
1858            }
1859        }
1860
1861        sector_num += n;
1862        nb_sectors -= n;
1863        buf += n * BDRV_SECTOR_SIZE;
1864    }
1865
1866    return 0;
1867}
1868
1869
1870static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1871                                         int nb_sectors, uint8_t *buf,
1872                                         enum ImgConvertBlockStatus status)
1873{
1874    int ret;
1875
1876    while (nb_sectors > 0) {
1877        int n = nb_sectors;
1878        BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1879
1880        switch (status) {
1881        case BLK_BACKING_FILE:
1882            /* If we have a backing file, leave clusters unallocated that are
1883             * unallocated in the source image, so that the backing file is
1884             * visible at the respective offset. */
1885            assert(s->target_has_backing);
1886            break;
1887
1888        case BLK_DATA:
1889            /* If we're told to keep the target fully allocated (-S 0) or there
1890             * is real non-zero data, we must write it. Otherwise we can treat
1891             * it as zero sectors.
1892             * Compressed clusters need to be written as a whole, so in that
1893             * case we can only save the write if the buffer is completely
1894             * zeroed. */
1895            if (!s->min_sparse ||
1896                (!s->compressed &&
1897                 is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1898                                          sector_num, s->alignment)) ||
1899                (s->compressed &&
1900                 !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1901            {
1902                ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1903                                    n << BDRV_SECTOR_BITS, buf, flags);
1904                if (ret < 0) {
1905                    return ret;
1906                }
1907                break;
1908            }
1909            /* fall-through */
1910
1911        case BLK_ZERO:
1912            if (s->has_zero_init) {
1913                assert(!s->target_has_backing);
1914                break;
1915            }
1916            ret = blk_co_pwrite_zeroes(s->target,
1917                                       sector_num << BDRV_SECTOR_BITS,
1918                                       n << BDRV_SECTOR_BITS,
1919                                       BDRV_REQ_MAY_UNMAP);
1920            if (ret < 0) {
1921                return ret;
1922            }
1923            break;
1924        }
1925
1926        sector_num += n;
1927        nb_sectors -= n;
1928        buf += n * BDRV_SECTOR_SIZE;
1929    }
1930
1931    return 0;
1932}
1933
1934static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1935                                              int nb_sectors)
1936{
1937    int n, ret;
1938
1939    while (nb_sectors > 0) {
1940        BlockBackend *blk;
1941        int src_cur;
1942        int64_t bs_sectors, src_cur_offset;
1943        int64_t offset;
1944
1945        convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1946        offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1947        blk = s->src[src_cur];
1948        bs_sectors = s->src_sectors[src_cur];
1949
1950        n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1951
1952        ret = blk_co_copy_range(blk, offset, s->target,
1953                                sector_num << BDRV_SECTOR_BITS,
1954                                n << BDRV_SECTOR_BITS, 0, 0);
1955        if (ret < 0) {
1956            return ret;
1957        }
1958
1959        sector_num += n;
1960        nb_sectors -= n;
1961    }
1962    return 0;
1963}
1964
1965static void coroutine_fn convert_co_do_copy(void *opaque)
1966{
1967    ImgConvertState *s = opaque;
1968    uint8_t *buf = NULL;
1969    int ret, i;
1970    int index = -1;
1971
1972    for (i = 0; i < s->num_coroutines; i++) {
1973        if (s->co[i] == qemu_coroutine_self()) {
1974            index = i;
1975            break;
1976        }
1977    }
1978    assert(index >= 0);
1979
1980    s->running_coroutines++;
1981    buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1982
1983    while (1) {
1984        int n;
1985        int64_t sector_num;
1986        enum ImgConvertBlockStatus status;
1987        bool copy_range;
1988
1989        qemu_co_mutex_lock(&s->lock);
1990        if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1991            qemu_co_mutex_unlock(&s->lock);
1992            break;
1993        }
1994        WITH_GRAPH_RDLOCK_GUARD() {
1995            n = convert_iteration_sectors(s, s->sector_num);
1996        }
1997        if (n < 0) {
1998            qemu_co_mutex_unlock(&s->lock);
1999            s->ret = n;
2000            break;
2001        }
2002        /* save current sector and allocation status to local variables */
2003        sector_num = s->sector_num;
2004        status = s->status;
2005        if (!s->min_sparse && s->status == BLK_ZERO) {
2006            n = MIN(n, s->buf_sectors);
2007        }
2008        /* increment global sector counter so that other coroutines can
2009         * already continue reading beyond this request */
2010        s->sector_num += n;
2011        qemu_co_mutex_unlock(&s->lock);
2012
2013        if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
2014            s->allocated_done += n;
2015            qemu_progress_print(100.0 * s->allocated_done /
2016                                        s->allocated_sectors, 0);
2017        }
2018
2019retry:
2020        copy_range = s->copy_range && s->status == BLK_DATA;
2021        if (status == BLK_DATA && !copy_range) {
2022            ret = convert_co_read(s, sector_num, n, buf);
2023            if (ret < 0) {
2024                error_report("error while reading at byte %lld: %s",
2025                             sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2026                s->ret = ret;
2027            }
2028        } else if (!s->min_sparse && status == BLK_ZERO) {
2029            status = BLK_DATA;
2030            memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
2031        }
2032
2033        if (s->wr_in_order) {
2034            /* keep writes in order */
2035            while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
2036                s->wait_sector_num[index] = sector_num;
2037                qemu_coroutine_yield();
2038            }
2039            s->wait_sector_num[index] = -1;
2040        }
2041
2042        if (s->ret == -EINPROGRESS) {
2043            if (copy_range) {
2044                WITH_GRAPH_RDLOCK_GUARD() {
2045                    ret = convert_co_copy_range(s, sector_num, n);
2046                }
2047                if (ret) {
2048                    s->copy_range = false;
2049                    goto retry;
2050                }
2051            } else {
2052                ret = convert_co_write(s, sector_num, n, buf, status);
2053            }
2054            if (ret < 0) {
2055                error_report("error while writing at byte %lld: %s",
2056                             sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2057                s->ret = ret;
2058            }
2059        }
2060
2061        if (s->wr_in_order) {
2062            /* reenter the coroutine that might have waited
2063             * for this write to complete */
2064            s->wr_offs = sector_num + n;
2065            for (i = 0; i < s->num_coroutines; i++) {
2066                if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
2067                    /*
2068                     * A -> B -> A cannot occur because A has
2069                     * s->wait_sector_num[i] == -1 during A -> B.  Therefore
2070                     * B will never enter A during this time window.
2071                     */
2072                    qemu_coroutine_enter(s->co[i]);
2073                    break;
2074                }
2075            }
2076        }
2077    }
2078
2079    qemu_vfree(buf);
2080    s->co[index] = NULL;
2081    s->running_coroutines--;
2082    if (!s->running_coroutines && s->ret == -EINPROGRESS) {
2083        /* the convert job finished successfully */
2084        s->ret = 0;
2085    }
2086}
2087
2088static int convert_do_copy(ImgConvertState *s)
2089{
2090    int ret, i, n;
2091    int64_t sector_num = 0;
2092
2093    /* Check whether we have zero initialisation or can get it efficiently */
2094    if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
2095        !s->target_has_backing) {
2096        s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
2097    }
2098
2099    /* Allocate buffer for copied data. For compressed images, only one cluster
2100     * can be copied at a time. */
2101    if (s->compressed) {
2102        if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2103            error_report("invalid cluster size");
2104            return -EINVAL;
2105        }
2106        s->buf_sectors = s->cluster_sectors;
2107    }
2108
2109    while (sector_num < s->total_sectors) {
2110        n = convert_iteration_sectors(s, sector_num);
2111        if (n < 0) {
2112            return n;
2113        }
2114        if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2115        {
2116            s->allocated_sectors += n;
2117        }
2118        sector_num += n;
2119    }
2120
2121    /* Do the copy */
2122    s->sector_next_status = 0;
2123    s->ret = -EINPROGRESS;
2124
2125    qemu_co_mutex_init(&s->lock);
2126    for (i = 0; i < s->num_coroutines; i++) {
2127        s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2128        s->wait_sector_num[i] = -1;
2129        qemu_coroutine_enter(s->co[i]);
2130    }
2131
2132    while (s->running_coroutines) {
2133        main_loop_wait(false);
2134    }
2135
2136    if (s->compressed && !s->ret) {
2137        /* signal EOF to align */
2138        ret = blk_pwrite_compressed(s->target, 0, 0, NULL);
2139        if (ret < 0) {
2140            return ret;
2141        }
2142    }
2143
2144    return s->ret;
2145}
2146
2147/* Check that bitmaps can be copied, or output an error */
2148static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken)
2149{
2150    BdrvDirtyBitmap *bm;
2151
2152    if (!bdrv_supports_persistent_dirty_bitmap(src)) {
2153        error_report("Source lacks bitmap support");
2154        return -1;
2155    }
2156    FOR_EACH_DIRTY_BITMAP(src, bm) {
2157        if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2158            continue;
2159        }
2160        if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2161            error_report("Cannot copy inconsistent bitmap '%s'",
2162                         bdrv_dirty_bitmap_name(bm));
2163            error_printf("Try --skip-broken-bitmaps, or "
2164                         "use 'qemu-img bitmap --remove' to delete it\n");
2165            return -1;
2166        }
2167    }
2168    return 0;
2169}
2170
2171static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
2172                                bool skip_broken)
2173{
2174    BdrvDirtyBitmap *bm;
2175    Error *err = NULL;
2176
2177    FOR_EACH_DIRTY_BITMAP(src, bm) {
2178        const char *name;
2179
2180        if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2181            continue;
2182        }
2183        name = bdrv_dirty_bitmap_name(bm);
2184        if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2185            warn_report("Skipping inconsistent bitmap '%s'", name);
2186            continue;
2187        }
2188        qmp_block_dirty_bitmap_add(dst->node_name, name,
2189                                   true, bdrv_dirty_bitmap_granularity(bm),
2190                                   true, true,
2191                                   true, !bdrv_dirty_bitmap_enabled(bm),
2192                                   &err);
2193        if (err) {
2194            error_reportf_err(err, "Failed to create bitmap %s: ", name);
2195            return -1;
2196        }
2197
2198        do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name,
2199                              &err);
2200        if (err) {
2201            error_reportf_err(err, "Failed to populate bitmap %s: ", name);
2202            qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL);
2203            return -1;
2204        }
2205    }
2206
2207    return 0;
2208}
2209
2210#define MAX_BUF_SECTORS 32768
2211
2212static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
2213{
2214    ThrottleConfig cfg;
2215
2216    throttle_config_init(&cfg);
2217    cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit;
2218
2219    blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP);
2220    blk_set_io_limits(blk, &cfg);
2221}
2222
2223static int img_convert(int argc, char **argv)
2224{
2225    int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
2226    const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2227               *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2228               *out_filename, *out_baseimg_param, *snapshot_name = NULL,
2229               *backing_fmt = NULL;
2230    BlockDriver *drv = NULL, *proto_drv = NULL;
2231    BlockDriverInfo bdi;
2232    BlockDriverState *out_bs;
2233    QemuOpts *opts = NULL, *sn_opts = NULL;
2234    QemuOptsList *create_opts = NULL;
2235    QDict *open_opts = NULL;
2236    char *options = NULL;
2237    Error *local_err = NULL;
2238    bool writethrough, src_writethrough, image_opts = false,
2239         skip_create = false, progress = false, tgt_image_opts = false;
2240    int64_t ret = -EINVAL;
2241    bool force_share = false;
2242    bool explict_min_sparse = false;
2243    bool bitmaps = false;
2244    bool skip_broken = false;
2245    int64_t rate_limit = 0;
2246
2247    ImgConvertState s = (ImgConvertState) {
2248        /* Need at least 4k of zeros for sparse detection */
2249        .min_sparse         = 8,
2250        .copy_range         = false,
2251        .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2252        .wr_in_order        = true,
2253        .num_coroutines     = 8,
2254    };
2255
2256    for(;;) {
2257        static const struct option long_options[] = {
2258            {"help", no_argument, 0, 'h'},
2259            {"object", required_argument, 0, OPTION_OBJECT},
2260            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2261            {"force-share", no_argument, 0, 'U'},
2262            {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2263            {"salvage", no_argument, 0, OPTION_SALVAGE},
2264            {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2265            {"bitmaps", no_argument, 0, OPTION_BITMAPS},
2266            {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
2267            {0, 0, 0, 0}
2268        };
2269        c = getopt_long(argc, argv, ":hf:O:B:CcF:o:l:S:pt:T:qnm:WUr:",
2270                        long_options, NULL);
2271        if (c == -1) {
2272            break;
2273        }
2274        switch(c) {
2275        case ':':
2276            missing_argument(argv[optind - 1]);
2277            break;
2278        case '?':
2279            unrecognized_option(argv[optind - 1]);
2280            break;
2281        case 'h':
2282            help();
2283            break;
2284        case 'f':
2285            fmt = optarg;
2286            break;
2287        case 'O':
2288            out_fmt = optarg;
2289            break;
2290        case 'B':
2291            out_baseimg = optarg;
2292            break;
2293        case 'C':
2294            s.copy_range = true;
2295            break;
2296        case 'c':
2297            s.compressed = true;
2298            break;
2299        case 'F':
2300            backing_fmt = optarg;
2301            break;
2302        case 'o':
2303            if (accumulate_options(&options, optarg) < 0) {
2304                goto fail_getopt;
2305            }
2306            break;
2307        case 'l':
2308            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2309                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2310                                                  optarg, false);
2311                if (!sn_opts) {
2312                    error_report("Failed in parsing snapshot param '%s'",
2313                                 optarg);
2314                    goto fail_getopt;
2315                }
2316            } else {
2317                snapshot_name = optarg;
2318            }
2319            break;
2320        case 'S':
2321        {
2322            int64_t sval;
2323
2324            sval = cvtnum("buffer size for sparse output", optarg);
2325            if (sval < 0) {
2326                goto fail_getopt;
2327            } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2328                sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2329                error_report("Invalid buffer size for sparse output specified. "
2330                    "Valid sizes are multiples of %llu up to %llu. Select "
2331                    "0 to disable sparse detection (fully allocates output).",
2332                    BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2333                goto fail_getopt;
2334            }
2335
2336            s.min_sparse = sval / BDRV_SECTOR_SIZE;
2337            explict_min_sparse = true;
2338            break;
2339        }
2340        case 'p':
2341            progress = true;
2342            break;
2343        case 't':
2344            cache = optarg;
2345            break;
2346        case 'T':
2347            src_cache = optarg;
2348            break;
2349        case 'q':
2350            s.quiet = true;
2351            break;
2352        case 'n':
2353            skip_create = true;
2354            break;
2355        case 'm':
2356            if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2357                s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2358                error_report("Invalid number of coroutines. Allowed number of"
2359                             " coroutines is between 1 and %d", MAX_COROUTINES);
2360                goto fail_getopt;
2361            }
2362            break;
2363        case 'W':
2364            s.wr_in_order = false;
2365            break;
2366        case 'U':
2367            force_share = true;
2368            break;
2369        case 'r':
2370            rate_limit = cvtnum("rate limit", optarg);
2371            if (rate_limit < 0) {
2372                goto fail_getopt;
2373            }
2374            break;
2375        case OPTION_OBJECT:
2376            user_creatable_process_cmdline(optarg);
2377            break;
2378        case OPTION_IMAGE_OPTS:
2379            image_opts = true;
2380            break;
2381        case OPTION_SALVAGE:
2382            s.salvage = true;
2383            break;
2384        case OPTION_TARGET_IMAGE_OPTS:
2385            tgt_image_opts = true;
2386            break;
2387        case OPTION_TARGET_IS_ZERO:
2388            /*
2389             * The user asserting that the target is blank has the
2390             * same effect as the target driver supporting zero
2391             * initialisation.
2392             */
2393            s.has_zero_init = true;
2394            break;
2395        case OPTION_BITMAPS:
2396            bitmaps = true;
2397            break;
2398        case OPTION_SKIP_BROKEN:
2399            skip_broken = true;
2400            break;
2401        }
2402    }
2403
2404    if (!out_fmt && !tgt_image_opts) {
2405        out_fmt = "raw";
2406    }
2407
2408    if (skip_broken && !bitmaps) {
2409        error_report("Use of --skip-broken-bitmaps requires --bitmaps");
2410        goto fail_getopt;
2411    }
2412
2413    if (s.compressed && s.copy_range) {
2414        error_report("Cannot enable copy offloading when -c is used");
2415        goto fail_getopt;
2416    }
2417
2418    if (explict_min_sparse && s.copy_range) {
2419        error_report("Cannot enable copy offloading when -S is used");
2420        goto fail_getopt;
2421    }
2422
2423    if (s.copy_range && s.salvage) {
2424        error_report("Cannot use copy offloading in salvaging mode");
2425        goto fail_getopt;
2426    }
2427
2428    if (tgt_image_opts && !skip_create) {
2429        error_report("--target-image-opts requires use of -n flag");
2430        goto fail_getopt;
2431    }
2432
2433    if (skip_create && options) {
2434        error_report("-o has no effect when skipping image creation");
2435        goto fail_getopt;
2436    }
2437
2438    if (s.has_zero_init && !skip_create) {
2439        error_report("--target-is-zero requires use of -n flag");
2440        goto fail_getopt;
2441    }
2442
2443    s.src_num = argc - optind - 1;
2444    out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2445
2446    if (options && has_help_option(options)) {
2447        if (out_fmt) {
2448            ret = print_block_option_help(out_filename, out_fmt);
2449            goto fail_getopt;
2450        } else {
2451            error_report("Option help requires a format be specified");
2452            goto fail_getopt;
2453        }
2454    }
2455
2456    if (s.src_num < 1) {
2457        error_report("Must specify image file name");
2458        goto fail_getopt;
2459    }
2460
2461    /* ret is still -EINVAL until here */
2462    ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2463    if (ret < 0) {
2464        error_report("Invalid source cache option: %s", src_cache);
2465        goto fail_getopt;
2466    }
2467
2468    /* Initialize before goto out */
2469    if (s.quiet) {
2470        progress = false;
2471    }
2472    qemu_progress_init(progress, 1.0);
2473    qemu_progress_print(0, 100);
2474
2475    s.src = g_new0(BlockBackend *, s.src_num);
2476    s.src_sectors = g_new(int64_t, s.src_num);
2477    s.src_alignment = g_new(int, s.src_num);
2478
2479    for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2480        BlockDriverState *src_bs;
2481        s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2482                               fmt, src_flags, src_writethrough, s.quiet,
2483                               force_share);
2484        if (!s.src[bs_i]) {
2485            ret = -1;
2486            goto out;
2487        }
2488        s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2489        if (s.src_sectors[bs_i] < 0) {
2490            error_report("Could not get size of %s: %s",
2491                         argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2492            ret = -1;
2493            goto out;
2494        }
2495        src_bs = blk_bs(s.src[bs_i]);
2496        s.src_alignment[bs_i] = DIV_ROUND_UP(src_bs->bl.request_alignment,
2497                                             BDRV_SECTOR_SIZE);
2498        if (!bdrv_get_info(src_bs, &bdi)) {
2499            s.src_alignment[bs_i] = MAX(s.src_alignment[bs_i],
2500                                        bdi.cluster_size / BDRV_SECTOR_SIZE);
2501        }
2502        s.total_sectors += s.src_sectors[bs_i];
2503    }
2504
2505    if (sn_opts) {
2506        bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2507                               qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2508                               qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2509                               &local_err);
2510    } else if (snapshot_name != NULL) {
2511        if (s.src_num > 1) {
2512            error_report("No support for concatenating multiple snapshot");
2513            ret = -1;
2514            goto out;
2515        }
2516
2517        bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2518                                             &local_err);
2519    }
2520    if (local_err) {
2521        error_reportf_err(local_err, "Failed to load snapshot: ");
2522        ret = -1;
2523        goto out;
2524    }
2525
2526    if (!skip_create) {
2527        /* Find driver and parse its options */
2528        drv = bdrv_find_format(out_fmt);
2529        if (!drv) {
2530            error_report("Unknown file format '%s'", out_fmt);
2531            ret = -1;
2532            goto out;
2533        }
2534
2535        proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2536        if (!proto_drv) {
2537            error_report_err(local_err);
2538            ret = -1;
2539            goto out;
2540        }
2541
2542        if (!drv->create_opts) {
2543            error_report("Format driver '%s' does not support image creation",
2544                         drv->format_name);
2545            ret = -1;
2546            goto out;
2547        }
2548
2549        if (!proto_drv->create_opts) {
2550            error_report("Protocol driver '%s' does not support image creation",
2551                         proto_drv->format_name);
2552            ret = -1;
2553            goto out;
2554        }
2555
2556        create_opts = qemu_opts_append(create_opts, drv->create_opts);
2557        create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2558
2559        opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2560        if (options) {
2561            if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
2562                error_report_err(local_err);
2563                ret = -1;
2564                goto out;
2565            }
2566        }
2567
2568        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
2569                            s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
2570        ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
2571        if (ret < 0) {
2572            goto out;
2573        }
2574    }
2575
2576    /* Get backing file name if -o backing_file was used */
2577    out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2578    if (out_baseimg_param) {
2579        out_baseimg = out_baseimg_param;
2580    }
2581    s.target_has_backing = (bool) out_baseimg;
2582
2583    if (s.has_zero_init && s.target_has_backing) {
2584        error_report("Cannot use --target-is-zero when the destination "
2585                     "image has a backing file");
2586        goto out;
2587    }
2588
2589    if (s.src_num > 1 && out_baseimg) {
2590        error_report("Having a backing file for the target makes no sense when "
2591                     "concatenating multiple input images");
2592        ret = -1;
2593        goto out;
2594    }
2595
2596    if (out_baseimg_param) {
2597        if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
2598            error_report("Use of backing file requires explicit "
2599                         "backing format");
2600            ret = -1;
2601            goto out;
2602        }
2603    }
2604
2605    /* Check if compression is supported */
2606    if (s.compressed) {
2607        bool encryption =
2608            qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2609        const char *encryptfmt =
2610            qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2611        const char *preallocation =
2612            qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2613
2614        if (drv && !block_driver_can_compress(drv)) {
2615            error_report("Compression not supported for this file format");
2616            ret = -1;
2617            goto out;
2618        }
2619
2620        if (encryption || encryptfmt) {
2621            error_report("Compression and encryption not supported at "
2622                         "the same time");
2623            ret = -1;
2624            goto out;
2625        }
2626
2627        if (preallocation
2628            && strcmp(preallocation, "off"))
2629        {
2630            error_report("Compression and preallocation not supported at "
2631                         "the same time");
2632            ret = -1;
2633            goto out;
2634        }
2635    }
2636
2637    /* Determine if bitmaps need copying */
2638    if (bitmaps) {
2639        if (s.src_num > 1) {
2640            error_report("Copying bitmaps only possible with single source");
2641            ret = -1;
2642            goto out;
2643        }
2644        ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken);
2645        if (ret < 0) {
2646            goto out;
2647        }
2648    }
2649
2650    /*
2651     * The later open call will need any decryption secrets, and
2652     * bdrv_create() will purge "opts", so extract them now before
2653     * they are lost.
2654     */
2655    if (!skip_create) {
2656        open_opts = qdict_new();
2657        qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2658
2659        /* Create the new image */
2660        ret = bdrv_create(drv, out_filename, opts, &local_err);
2661        if (ret < 0) {
2662            error_reportf_err(local_err, "%s: error while converting %s: ",
2663                              out_filename, out_fmt);
2664            goto out;
2665        }
2666    }
2667
2668    s.target_is_new = !skip_create;
2669
2670    flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2671    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2672    if (ret < 0) {
2673        error_report("Invalid cache option: %s", cache);
2674        goto out;
2675    }
2676
2677    if (flags & BDRV_O_NOCACHE) {
2678        /*
2679         * If we open the target with O_DIRECT, it may be necessary to
2680         * extend its size to align to the physical sector size.
2681         */
2682        flags |= BDRV_O_RESIZE;
2683    }
2684
2685    if (skip_create) {
2686        s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2687                            flags, writethrough, s.quiet, false);
2688    } else {
2689        /* TODO ultimately we should allow --target-image-opts
2690         * to be used even when -n is not given.
2691         * That has to wait for bdrv_create to be improved
2692         * to allow filenames in option syntax
2693         */
2694        s.target = img_open_file(out_filename, open_opts, out_fmt,
2695                                 flags, writethrough, s.quiet, false);
2696        open_opts = NULL; /* blk_new_open will have freed it */
2697    }
2698    if (!s.target) {
2699        ret = -1;
2700        goto out;
2701    }
2702    out_bs = blk_bs(s.target);
2703
2704    if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) {
2705        error_report("Format driver '%s' does not support bitmaps",
2706                     out_bs->drv->format_name);
2707        ret = -1;
2708        goto out;
2709    }
2710
2711    if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2712        error_report("Compression not supported for this file format");
2713        ret = -1;
2714        goto out;
2715    }
2716
2717    /* increase bufsectors from the default 4096 (2M) if opt_transfer
2718     * or discard_alignment of the out_bs is greater. Limit to
2719     * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2720    s.buf_sectors = MIN(MAX_BUF_SECTORS,
2721                        MAX(s.buf_sectors,
2722                            MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2723                                out_bs->bl.pdiscard_alignment >>
2724                                BDRV_SECTOR_BITS)));
2725
2726    /* try to align the write requests to the destination to avoid unnecessary
2727     * RMW cycles. */
2728    s.alignment = MAX(pow2floor(s.min_sparse),
2729                      DIV_ROUND_UP(out_bs->bl.request_alignment,
2730                                   BDRV_SECTOR_SIZE));
2731    assert(is_power_of_2(s.alignment));
2732
2733    if (skip_create) {
2734        int64_t output_sectors = blk_nb_sectors(s.target);
2735        if (output_sectors < 0) {
2736            error_report("unable to get output image length: %s",
2737                         strerror(-output_sectors));
2738            ret = -1;
2739            goto out;
2740        } else if (output_sectors < s.total_sectors) {
2741            error_report("output file is smaller than input file");
2742            ret = -1;
2743            goto out;
2744        }
2745    }
2746
2747    if (s.target_has_backing && s.target_is_new) {
2748        /* Errors are treated as "backing length unknown" (which means
2749         * s.target_backing_sectors has to be negative, which it will
2750         * be automatically).  The backing file length is used only
2751         * for optimizations, so such a case is not fatal. */
2752        s.target_backing_sectors =
2753            bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
2754    } else {
2755        s.target_backing_sectors = -1;
2756    }
2757
2758    ret = bdrv_get_info(out_bs, &bdi);
2759    if (ret < 0) {
2760        if (s.compressed) {
2761            error_report("could not get block driver info");
2762            goto out;
2763        }
2764    } else {
2765        s.compressed = s.compressed || bdi.needs_compressed_writes;
2766        s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2767    }
2768
2769    if (rate_limit) {
2770        set_rate_limit(s.target, rate_limit);
2771    }
2772
2773    ret = convert_do_copy(&s);
2774
2775    /* Now copy the bitmaps */
2776    if (bitmaps && ret == 0) {
2777        ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken);
2778    }
2779
2780out:
2781    if (!ret) {
2782        qemu_progress_print(100, 0);
2783    }
2784    qemu_progress_end();
2785    qemu_opts_del(opts);
2786    qemu_opts_free(create_opts);
2787    qobject_unref(open_opts);
2788    blk_unref(s.target);
2789    if (s.src) {
2790        for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2791            blk_unref(s.src[bs_i]);
2792        }
2793        g_free(s.src);
2794    }
2795    g_free(s.src_sectors);
2796    g_free(s.src_alignment);
2797fail_getopt:
2798    qemu_opts_del(sn_opts);
2799    g_free(options);
2800
2801    return !!ret;
2802}
2803
2804
2805static void dump_snapshots(BlockDriverState *bs)
2806{
2807    QEMUSnapshotInfo *sn_tab, *sn;
2808    int nb_sns, i;
2809
2810    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2811    if (nb_sns <= 0)
2812        return;
2813    printf("Snapshot list:\n");
2814    bdrv_snapshot_dump(NULL);
2815    printf("\n");
2816    for(i = 0; i < nb_sns; i++) {
2817        sn = &sn_tab[i];
2818        bdrv_snapshot_dump(sn);
2819        printf("\n");
2820    }
2821    g_free(sn_tab);
2822}
2823
2824static void dump_json_block_graph_info_list(BlockGraphInfoList *list)
2825{
2826    GString *str;
2827    QObject *obj;
2828    Visitor *v = qobject_output_visitor_new(&obj);
2829
2830    visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort);
2831    visit_complete(v, &obj);
2832    str = qobject_to_json_pretty(obj, true);
2833    assert(str != NULL);
2834    printf("%s\n", str->str);
2835    qobject_unref(obj);
2836    visit_free(v);
2837    g_string_free(str, true);
2838}
2839
2840static void dump_json_block_graph_info(BlockGraphInfo *info)
2841{
2842    GString *str;
2843    QObject *obj;
2844    Visitor *v = qobject_output_visitor_new(&obj);
2845
2846    visit_type_BlockGraphInfo(v, NULL, &info, &error_abort);
2847    visit_complete(v, &obj);
2848    str = qobject_to_json_pretty(obj, true);
2849    assert(str != NULL);
2850    printf("%s\n", str->str);
2851    qobject_unref(obj);
2852    visit_free(v);
2853    g_string_free(str, true);
2854}
2855
2856static void dump_human_image_info(BlockGraphInfo *info, int indentation,
2857                                  const char *path)
2858{
2859    BlockChildInfoList *children_list;
2860
2861    bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation,
2862                        info->children == NULL);
2863
2864    for (children_list = info->children; children_list;
2865         children_list = children_list->next)
2866    {
2867        BlockChildInfo *child = children_list->value;
2868        g_autofree char *child_path = NULL;
2869
2870        printf("%*sChild node '%s%s':\n",
2871               indentation * 4, "", path, child->name);
2872        child_path = g_strdup_printf("%s%s/", path, child->name);
2873        dump_human_image_info(child->info, indentation + 1, child_path);
2874    }
2875}
2876
2877static void dump_human_image_info_list(BlockGraphInfoList *list)
2878{
2879    BlockGraphInfoList *elem;
2880    bool delim = false;
2881
2882    for (elem = list; elem; elem = elem->next) {
2883        if (delim) {
2884            printf("\n");
2885        }
2886        delim = true;
2887
2888        dump_human_image_info(elem->value, 0, "/");
2889    }
2890}
2891
2892static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2893{
2894    return strcmp(a, b) == 0;
2895}
2896
2897/**
2898 * Open an image file chain and return an BlockGraphInfoList
2899 *
2900 * @filename: topmost image filename
2901 * @fmt: topmost image format (may be NULL to autodetect)
2902 * @chain: true  - enumerate entire backing file chain
2903 *         false - only topmost image file
2904 *
2905 * Returns a list of BlockNodeInfo objects or NULL if there was an error
2906 * opening an image file.  If there was an error a message will have been
2907 * printed to stderr.
2908 */
2909static BlockGraphInfoList *collect_image_info_list(bool image_opts,
2910                                                   const char *filename,
2911                                                   const char *fmt,
2912                                                   bool chain, bool force_share)
2913{
2914    BlockGraphInfoList *head = NULL;
2915    BlockGraphInfoList **tail = &head;
2916    GHashTable *filenames;
2917    Error *err = NULL;
2918
2919    filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2920
2921    while (filename) {
2922        BlockBackend *blk;
2923        BlockDriverState *bs;
2924        BlockGraphInfo *info;
2925
2926        if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2927            error_report("Backing file '%s' creates an infinite loop.",
2928                         filename);
2929            goto err;
2930        }
2931        g_hash_table_insert(filenames, (gpointer)filename, NULL);
2932
2933        blk = img_open(image_opts, filename, fmt,
2934                       BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2935                       force_share);
2936        if (!blk) {
2937            goto err;
2938        }
2939        bs = blk_bs(blk);
2940
2941        /*
2942         * Note that the returned BlockGraphInfo object will not have
2943         * information about this image's backing node, because we have opened
2944         * it with BDRV_O_NO_BACKING.  Printing this object will therefore not
2945         * duplicate the backing chain information that we obtain by walking
2946         * the chain manually here.
2947         */
2948        bdrv_graph_rdlock_main_loop();
2949        bdrv_query_block_graph_info(bs, &info, &err);
2950        bdrv_graph_rdunlock_main_loop();
2951
2952        if (err) {
2953            error_report_err(err);
2954            blk_unref(blk);
2955            goto err;
2956        }
2957
2958        QAPI_LIST_APPEND(tail, info);
2959
2960        blk_unref(blk);
2961
2962        /* Clear parameters that only apply to the topmost image */
2963        filename = fmt = NULL;
2964        image_opts = false;
2965
2966        if (chain) {
2967            if (info->full_backing_filename) {
2968                filename = info->full_backing_filename;
2969            } else if (info->backing_filename) {
2970                error_report("Could not determine absolute backing filename,"
2971                             " but backing filename '%s' present",
2972                             info->backing_filename);
2973                goto err;
2974            }
2975            if (info->backing_filename_format) {
2976                fmt = info->backing_filename_format;
2977            }
2978        }
2979    }
2980    g_hash_table_destroy(filenames);
2981    return head;
2982
2983err:
2984    qapi_free_BlockGraphInfoList(head);
2985    g_hash_table_destroy(filenames);
2986    return NULL;
2987}
2988
2989static int img_info(int argc, char **argv)
2990{
2991    int c;
2992    OutputFormat output_format = OFORMAT_HUMAN;
2993    bool chain = false;
2994    const char *filename, *fmt, *output;
2995    BlockGraphInfoList *list;
2996    bool image_opts = false;
2997    bool force_share = false;
2998
2999    fmt = NULL;
3000    output = NULL;
3001    for(;;) {
3002        int option_index = 0;
3003        static const struct option long_options[] = {
3004            {"help", no_argument, 0, 'h'},
3005            {"format", required_argument, 0, 'f'},
3006            {"output", required_argument, 0, OPTION_OUTPUT},
3007            {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
3008            {"object", required_argument, 0, OPTION_OBJECT},
3009            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3010            {"force-share", no_argument, 0, 'U'},
3011            {0, 0, 0, 0}
3012        };
3013        c = getopt_long(argc, argv, ":f:hU",
3014                        long_options, &option_index);
3015        if (c == -1) {
3016            break;
3017        }
3018        switch(c) {
3019        case ':':
3020            missing_argument(argv[optind - 1]);
3021            break;
3022        case '?':
3023            unrecognized_option(argv[optind - 1]);
3024            break;
3025        case 'h':
3026            help();
3027            break;
3028        case 'f':
3029            fmt = optarg;
3030            break;
3031        case 'U':
3032            force_share = true;
3033            break;
3034        case OPTION_OUTPUT:
3035            output = optarg;
3036            break;
3037        case OPTION_BACKING_CHAIN:
3038            chain = true;
3039            break;
3040        case OPTION_OBJECT:
3041            user_creatable_process_cmdline(optarg);
3042            break;
3043        case OPTION_IMAGE_OPTS:
3044            image_opts = true;
3045            break;
3046        }
3047    }
3048    if (optind != argc - 1) {
3049        error_exit("Expecting one image file name");
3050    }
3051    filename = argv[optind++];
3052
3053    if (output && !strcmp(output, "json")) {
3054        output_format = OFORMAT_JSON;
3055    } else if (output && !strcmp(output, "human")) {
3056        output_format = OFORMAT_HUMAN;
3057    } else if (output) {
3058        error_report("--output must be used with human or json as argument.");
3059        return 1;
3060    }
3061
3062    list = collect_image_info_list(image_opts, filename, fmt, chain,
3063                                   force_share);
3064    if (!list) {
3065        return 1;
3066    }
3067
3068    switch (output_format) {
3069    case OFORMAT_HUMAN:
3070        dump_human_image_info_list(list);
3071        break;
3072    case OFORMAT_JSON:
3073        if (chain) {
3074            dump_json_block_graph_info_list(list);
3075        } else {
3076            dump_json_block_graph_info(list->value);
3077        }
3078        break;
3079    }
3080
3081    qapi_free_BlockGraphInfoList(list);
3082    return 0;
3083}
3084
3085static int dump_map_entry(OutputFormat output_format, MapEntry *e,
3086                          MapEntry *next)
3087{
3088    switch (output_format) {
3089    case OFORMAT_HUMAN:
3090        if (e->data && !e->has_offset) {
3091            error_report("File contains external, encrypted or compressed clusters.");
3092            return -1;
3093        }
3094        if (e->data && !e->zero) {
3095            printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
3096                   e->start, e->length,
3097                   e->has_offset ? e->offset : 0,
3098                   e->filename ?: "");
3099        }
3100        /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
3101         * Modify the flags here to allow more coalescing.
3102         */
3103        if (next && (!next->data || next->zero)) {
3104            next->data = false;
3105            next->zero = true;
3106        }
3107        break;
3108    case OFORMAT_JSON:
3109        printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
3110               " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
3111               " \"data\": %s", e->start, e->length, e->depth,
3112               e->present ? "true" : "false",
3113               e->zero ? "true" : "false",
3114               e->data ? "true" : "false");
3115        if (e->has_offset) {
3116            printf(", \"offset\": %"PRId64"", e->offset);
3117        }
3118        putchar('}');
3119
3120        if (next) {
3121            puts(",");
3122        }
3123        break;
3124    }
3125    return 0;
3126}
3127
3128static int get_block_status(BlockDriverState *bs, int64_t offset,
3129                            int64_t bytes, MapEntry *e)
3130{
3131    int ret;
3132    int depth;
3133    BlockDriverState *file;
3134    bool has_offset;
3135    int64_t map;
3136    char *filename = NULL;
3137
3138    /* As an optimization, we could cache the current range of unallocated
3139     * clusters in each file of the chain, and avoid querying the same
3140     * range repeatedly.
3141     */
3142
3143    depth = 0;
3144    for (;;) {
3145        bs = bdrv_skip_filters(bs);
3146        ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
3147        if (ret < 0) {
3148            return ret;
3149        }
3150        assert(bytes);
3151        if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
3152            break;
3153        }
3154        bs = bdrv_cow_bs(bs);
3155        if (bs == NULL) {
3156            ret = 0;
3157            break;
3158        }
3159
3160        depth++;
3161    }
3162
3163    has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
3164
3165    if (file && has_offset) {
3166        bdrv_refresh_filename(file);
3167        filename = file->filename;
3168    }
3169
3170    *e = (MapEntry) {
3171        .start = offset,
3172        .length = bytes,
3173        .data = !!(ret & BDRV_BLOCK_DATA),
3174        .zero = !!(ret & BDRV_BLOCK_ZERO),
3175        .offset = map,
3176        .has_offset = has_offset,
3177        .depth = depth,
3178        .present = !!(ret & BDRV_BLOCK_ALLOCATED),
3179        .filename = filename,
3180    };
3181
3182    return 0;
3183}
3184
3185static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
3186{
3187    if (curr->length == 0) {
3188        return false;
3189    }
3190    if (curr->zero != next->zero ||
3191        curr->data != next->data ||
3192        curr->depth != next->depth ||
3193        curr->present != next->present ||
3194        !curr->filename != !next->filename ||
3195        curr->has_offset != next->has_offset) {
3196        return false;
3197    }
3198    if (curr->filename && strcmp(curr->filename, next->filename)) {
3199        return false;
3200    }
3201    if (curr->has_offset && curr->offset + curr->length != next->offset) {
3202        return false;
3203    }
3204    return true;
3205}
3206
3207static int img_map(int argc, char **argv)
3208{
3209    int c;
3210    OutputFormat output_format = OFORMAT_HUMAN;
3211    BlockBackend *blk;
3212    BlockDriverState *bs;
3213    const char *filename, *fmt, *output;
3214    int64_t length;
3215    MapEntry curr = { .length = 0 }, next;
3216    int ret = 0;
3217    bool image_opts = false;
3218    bool force_share = false;
3219    int64_t start_offset = 0;
3220    int64_t max_length = -1;
3221
3222    fmt = NULL;
3223    output = NULL;
3224    for (;;) {
3225        int option_index = 0;
3226        static const struct option long_options[] = {
3227            {"help", no_argument, 0, 'h'},
3228            {"format", required_argument, 0, 'f'},
3229            {"output", required_argument, 0, OPTION_OUTPUT},
3230            {"object", required_argument, 0, OPTION_OBJECT},
3231            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3232            {"force-share", no_argument, 0, 'U'},
3233            {"start-offset", required_argument, 0, 's'},
3234            {"max-length", required_argument, 0, 'l'},
3235            {0, 0, 0, 0}
3236        };
3237        c = getopt_long(argc, argv, ":f:s:l:hU",
3238                        long_options, &option_index);
3239        if (c == -1) {
3240            break;
3241        }
3242        switch (c) {
3243        case ':':
3244            missing_argument(argv[optind - 1]);
3245            break;
3246        case '?':
3247            unrecognized_option(argv[optind - 1]);
3248            break;
3249        case 'h':
3250            help();
3251            break;
3252        case 'f':
3253            fmt = optarg;
3254            break;
3255        case 'U':
3256            force_share = true;
3257            break;
3258        case OPTION_OUTPUT:
3259            output = optarg;
3260            break;
3261        case 's':
3262            start_offset = cvtnum("start offset", optarg);
3263            if (start_offset < 0) {
3264                return 1;
3265            }
3266            break;
3267        case 'l':
3268            max_length = cvtnum("max length", optarg);
3269            if (max_length < 0) {
3270                return 1;
3271            }
3272            break;
3273        case OPTION_OBJECT:
3274            user_creatable_process_cmdline(optarg);
3275            break;
3276        case OPTION_IMAGE_OPTS:
3277            image_opts = true;
3278            break;
3279        }
3280    }
3281    if (optind != argc - 1) {
3282        error_exit("Expecting one image file name");
3283    }
3284    filename = argv[optind];
3285
3286    if (output && !strcmp(output, "json")) {
3287        output_format = OFORMAT_JSON;
3288    } else if (output && !strcmp(output, "human")) {
3289        output_format = OFORMAT_HUMAN;
3290    } else if (output) {
3291        error_report("--output must be used with human or json as argument.");
3292        return 1;
3293    }
3294
3295    blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3296    if (!blk) {
3297        return 1;
3298    }
3299    bs = blk_bs(blk);
3300
3301    if (output_format == OFORMAT_HUMAN) {
3302        printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3303    } else if (output_format == OFORMAT_JSON) {
3304        putchar('[');
3305    }
3306
3307    length = blk_getlength(blk);
3308    if (length < 0) {
3309        error_report("Failed to get size for '%s'", filename);
3310        return 1;
3311    }
3312    if (max_length != -1) {
3313        length = MIN(start_offset + max_length, length);
3314    }
3315
3316    curr.start = start_offset;
3317    while (curr.start + curr.length < length) {
3318        int64_t offset = curr.start + curr.length;
3319        int64_t n = length - offset;
3320
3321        ret = get_block_status(bs, offset, n, &next);
3322        if (ret < 0) {
3323            error_report("Could not read file metadata: %s", strerror(-ret));
3324            goto out;
3325        }
3326
3327        if (entry_mergeable(&curr, &next)) {
3328            curr.length += next.length;
3329            continue;
3330        }
3331
3332        if (curr.length > 0) {
3333            ret = dump_map_entry(output_format, &curr, &next);
3334            if (ret < 0) {
3335                goto out;
3336            }
3337        }
3338        curr = next;
3339    }
3340
3341    ret = dump_map_entry(output_format, &curr, NULL);
3342    if (output_format == OFORMAT_JSON) {
3343        puts("]");
3344    }
3345
3346out:
3347    blk_unref(blk);
3348    return ret < 0;
3349}
3350
3351#define SNAPSHOT_LIST   1
3352#define SNAPSHOT_CREATE 2
3353#define SNAPSHOT_APPLY  3
3354#define SNAPSHOT_DELETE 4
3355
3356static int img_snapshot(int argc, char **argv)
3357{
3358    BlockBackend *blk;
3359    BlockDriverState *bs;
3360    QEMUSnapshotInfo sn;
3361    char *filename, *snapshot_name = NULL;
3362    int c, ret = 0, bdrv_oflags;
3363    int action = 0;
3364    bool quiet = false;
3365    Error *err = NULL;
3366    bool image_opts = false;
3367    bool force_share = false;
3368    int64_t rt;
3369
3370    bdrv_oflags = BDRV_O_RDWR;
3371    /* Parse commandline parameters */
3372    for(;;) {
3373        static const struct option long_options[] = {
3374            {"help", no_argument, 0, 'h'},
3375            {"object", required_argument, 0, OPTION_OBJECT},
3376            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3377            {"force-share", no_argument, 0, 'U'},
3378            {0, 0, 0, 0}
3379        };
3380        c = getopt_long(argc, argv, ":la:c:d:hqU",
3381                        long_options, NULL);
3382        if (c == -1) {
3383            break;
3384        }
3385        switch(c) {
3386        case ':':
3387            missing_argument(argv[optind - 1]);
3388            break;
3389        case '?':
3390            unrecognized_option(argv[optind - 1]);
3391            break;
3392        case 'h':
3393            help();
3394            return 0;
3395        case 'l':
3396            if (action) {
3397                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3398                return 0;
3399            }
3400            action = SNAPSHOT_LIST;
3401            bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3402            break;
3403        case 'a':
3404            if (action) {
3405                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3406                return 0;
3407            }
3408            action = SNAPSHOT_APPLY;
3409            snapshot_name = optarg;
3410            break;
3411        case 'c':
3412            if (action) {
3413                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3414                return 0;
3415            }
3416            action = SNAPSHOT_CREATE;
3417            snapshot_name = optarg;
3418            break;
3419        case 'd':
3420            if (action) {
3421                error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3422                return 0;
3423            }
3424            action = SNAPSHOT_DELETE;
3425            snapshot_name = optarg;
3426            break;
3427        case 'q':
3428            quiet = true;
3429            break;
3430        case 'U':
3431            force_share = true;
3432            break;
3433        case OPTION_OBJECT:
3434            user_creatable_process_cmdline(optarg);
3435            break;
3436        case OPTION_IMAGE_OPTS:
3437            image_opts = true;
3438            break;
3439        }
3440    }
3441
3442    if (optind != argc - 1) {
3443        error_exit("Expecting one image file name");
3444    }
3445    filename = argv[optind++];
3446
3447    /* Open the image */
3448    blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3449                   force_share);
3450    if (!blk) {
3451        return 1;
3452    }
3453    bs = blk_bs(blk);
3454
3455    /* Perform the requested action */
3456    switch(action) {
3457    case SNAPSHOT_LIST:
3458        dump_snapshots(bs);
3459        break;
3460
3461    case SNAPSHOT_CREATE:
3462        memset(&sn, 0, sizeof(sn));
3463        pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3464
3465        rt = g_get_real_time();
3466        sn.date_sec = rt / G_USEC_PER_SEC;
3467        sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000;
3468
3469        ret = bdrv_snapshot_create(bs, &sn);
3470        if (ret) {
3471            error_report("Could not create snapshot '%s': %d (%s)",
3472                snapshot_name, ret, strerror(-ret));
3473        }
3474        break;
3475
3476    case SNAPSHOT_APPLY:
3477        ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3478        if (ret) {
3479            error_reportf_err(err, "Could not apply snapshot '%s': ",
3480                              snapshot_name);
3481        }
3482        break;
3483
3484    case SNAPSHOT_DELETE:
3485        ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3486        if (ret < 0) {
3487            error_report("Could not delete snapshot '%s': snapshot not "
3488                         "found", snapshot_name);
3489            ret = 1;
3490        } else {
3491            ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3492            if (ret < 0) {
3493                error_reportf_err(err, "Could not delete snapshot '%s': ",
3494                                  snapshot_name);
3495                ret = 1;
3496            }
3497        }
3498        break;
3499    }
3500
3501    /* Cleanup */
3502    blk_unref(blk);
3503    if (ret) {
3504        return 1;
3505    }
3506    return 0;
3507}
3508
3509static int img_rebase(int argc, char **argv)
3510{
3511    BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3512    uint8_t *buf_old = NULL;
3513    uint8_t *buf_new = NULL;
3514    BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3515    BlockDriverState *unfiltered_bs;
3516    char *filename;
3517    const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3518    int c, flags, src_flags, ret;
3519    bool writethrough, src_writethrough;
3520    int unsafe = 0;
3521    bool force_share = false;
3522    int progress = 0;
3523    bool quiet = false;
3524    Error *local_err = NULL;
3525    bool image_opts = false;
3526
3527    /* Parse commandline parameters */
3528    fmt = NULL;
3529    cache = BDRV_DEFAULT_CACHE;
3530    src_cache = BDRV_DEFAULT_CACHE;
3531    out_baseimg = NULL;
3532    out_basefmt = NULL;
3533    for(;;) {
3534        static const struct option long_options[] = {
3535            {"help", no_argument, 0, 'h'},
3536            {"object", required_argument, 0, OPTION_OBJECT},
3537            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3538            {"force-share", no_argument, 0, 'U'},
3539            {0, 0, 0, 0}
3540        };
3541        c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3542                        long_options, NULL);
3543        if (c == -1) {
3544            break;
3545        }
3546        switch(c) {
3547        case ':':
3548            missing_argument(argv[optind - 1]);
3549            break;
3550        case '?':
3551            unrecognized_option(argv[optind - 1]);
3552            break;
3553        case 'h':
3554            help();
3555            return 0;
3556        case 'f':
3557            fmt = optarg;
3558            break;
3559        case 'F':
3560            out_basefmt = optarg;
3561            break;
3562        case 'b':
3563            out_baseimg = optarg;
3564            break;
3565        case 'u':
3566            unsafe = 1;
3567            break;
3568        case 'p':
3569            progress = 1;
3570            break;
3571        case 't':
3572            cache = optarg;
3573            break;
3574        case 'T':
3575            src_cache = optarg;
3576            break;
3577        case 'q':
3578            quiet = true;
3579            break;
3580        case OPTION_OBJECT:
3581            user_creatable_process_cmdline(optarg);
3582            break;
3583        case OPTION_IMAGE_OPTS:
3584            image_opts = true;
3585            break;
3586        case 'U':
3587            force_share = true;
3588            break;
3589        }
3590    }
3591
3592    if (quiet) {
3593        progress = 0;
3594    }
3595
3596    if (optind != argc - 1) {
3597        error_exit("Expecting one image file name");
3598    }
3599    if (!unsafe && !out_baseimg) {
3600        error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3601    }
3602    filename = argv[optind++];
3603
3604    qemu_progress_init(progress, 2.0);
3605    qemu_progress_print(0, 100);
3606
3607    flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3608    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3609    if (ret < 0) {
3610        error_report("Invalid cache option: %s", cache);
3611        goto out;
3612    }
3613
3614    src_flags = 0;
3615    ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3616    if (ret < 0) {
3617        error_report("Invalid source cache option: %s", src_cache);
3618        goto out;
3619    }
3620
3621    /* The source files are opened read-only, don't care about WCE */
3622    assert((src_flags & BDRV_O_RDWR) == 0);
3623    (void) src_writethrough;
3624
3625    /*
3626     * Open the images.
3627     *
3628     * Ignore the old backing file for unsafe rebase in case we want to correct
3629     * the reference to a renamed or moved backing file.
3630     */
3631    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3632                   false);
3633    if (!blk) {
3634        ret = -1;
3635        goto out;
3636    }
3637    bs = blk_bs(blk);
3638
3639    unfiltered_bs = bdrv_skip_filters(bs);
3640
3641    if (out_basefmt != NULL) {
3642        if (bdrv_find_format(out_basefmt) == NULL) {
3643            error_report("Invalid format name: '%s'", out_basefmt);
3644            ret = -1;
3645            goto out;
3646        }
3647    }
3648
3649    /* For safe rebasing we need to compare old and new backing file */
3650    if (!unsafe) {
3651        QDict *options = NULL;
3652        BlockDriverState *base_bs = bdrv_cow_bs(unfiltered_bs);
3653
3654        if (base_bs) {
3655            blk_old_backing = blk_new(qemu_get_aio_context(),
3656                                      BLK_PERM_CONSISTENT_READ,
3657                                      BLK_PERM_ALL);
3658            ret = blk_insert_bs(blk_old_backing, base_bs,
3659                                &local_err);
3660            if (ret < 0) {
3661                error_reportf_err(local_err,
3662                                  "Could not reuse old backing file '%s': ",
3663                                  base_bs->filename);
3664                goto out;
3665            }
3666        } else {
3667            blk_old_backing = NULL;
3668        }
3669
3670        if (out_baseimg[0]) {
3671            const char *overlay_filename;
3672            char *out_real_path;
3673
3674            options = qdict_new();
3675            if (out_basefmt) {
3676                qdict_put_str(options, "driver", out_basefmt);
3677            }
3678            if (force_share) {
3679                qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3680            }
3681
3682            bdrv_refresh_filename(bs);
3683            overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3684                                                     : bs->filename;
3685            out_real_path =
3686                bdrv_get_full_backing_filename_from_filename(overlay_filename,
3687                                                             out_baseimg,
3688                                                             &local_err);
3689            if (local_err) {
3690                qobject_unref(options);
3691                error_reportf_err(local_err,
3692                                  "Could not resolve backing filename: ");
3693                ret = -1;
3694                goto out;
3695            }
3696
3697            /*
3698             * Find out whether we rebase an image on top of a previous image
3699             * in its chain.
3700             */
3701            prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3702            if (prefix_chain_bs) {
3703                qobject_unref(options);
3704                g_free(out_real_path);
3705
3706                blk_new_backing = blk_new(qemu_get_aio_context(),
3707                                          BLK_PERM_CONSISTENT_READ,
3708                                          BLK_PERM_ALL);
3709                ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3710                                    &local_err);
3711                if (ret < 0) {
3712                    error_reportf_err(local_err,
3713                                      "Could not reuse backing file '%s': ",
3714                                      out_baseimg);
3715                    goto out;
3716                }
3717            } else {
3718                blk_new_backing = blk_new_open(out_real_path, NULL,
3719                                               options, src_flags, &local_err);
3720                g_free(out_real_path);
3721                if (!blk_new_backing) {
3722                    error_reportf_err(local_err,
3723                                      "Could not open new backing file '%s': ",
3724                                      out_baseimg);
3725                    ret = -1;
3726                    goto out;
3727                }
3728            }
3729        }
3730    }
3731
3732    /*
3733     * Check each unallocated cluster in the COW file. If it is unallocated,
3734     * accesses go to the backing file. We must therefore compare this cluster
3735     * in the old and new backing file, and if they differ we need to copy it
3736     * from the old backing file into the COW file.
3737     *
3738     * If qemu-img crashes during this step, no harm is done. The content of
3739     * the image is the same as the original one at any time.
3740     */
3741    if (!unsafe) {
3742        int64_t size;
3743        int64_t old_backing_size = 0;
3744        int64_t new_backing_size = 0;
3745        uint64_t offset;
3746        int64_t n;
3747        float local_progress = 0;
3748
3749        buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3750        buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3751
3752        size = blk_getlength(blk);
3753        if (size < 0) {
3754            error_report("Could not get size of '%s': %s",
3755                         filename, strerror(-size));
3756            ret = -1;
3757            goto out;
3758        }
3759        if (blk_old_backing) {
3760            old_backing_size = blk_getlength(blk_old_backing);
3761            if (old_backing_size < 0) {
3762                char backing_name[PATH_MAX];
3763
3764                bdrv_get_backing_filename(bs, backing_name,
3765                                          sizeof(backing_name));
3766                error_report("Could not get size of '%s': %s",
3767                             backing_name, strerror(-old_backing_size));
3768                ret = -1;
3769                goto out;
3770            }
3771        }
3772        if (blk_new_backing) {
3773            new_backing_size = blk_getlength(blk_new_backing);
3774            if (new_backing_size < 0) {
3775                error_report("Could not get size of '%s': %s",
3776                             out_baseimg, strerror(-new_backing_size));
3777                ret = -1;
3778                goto out;
3779            }
3780        }
3781
3782        if (size != 0) {
3783            local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3784        }
3785
3786        for (offset = 0; offset < size; offset += n) {
3787            bool buf_old_is_zero = false;
3788
3789            /* How many bytes can we handle with the next read? */
3790            n = MIN(IO_BUF_SIZE, size - offset);
3791
3792            /* If the cluster is allocated, we don't need to take action */
3793            ret = bdrv_is_allocated(unfiltered_bs, offset, n, &n);
3794            if (ret < 0) {
3795                error_report("error while reading image metadata: %s",
3796                             strerror(-ret));
3797                goto out;
3798            }
3799            if (ret) {
3800                continue;
3801            }
3802
3803            if (prefix_chain_bs) {
3804                /*
3805                 * If cluster wasn't changed since prefix_chain, we don't need
3806                 * to take action
3807                 */
3808                ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
3809                                              prefix_chain_bs, false,
3810                                              offset, n, &n);
3811                if (ret < 0) {
3812                    error_report("error while reading image metadata: %s",
3813                                 strerror(-ret));
3814                    goto out;
3815                }
3816                if (!ret) {
3817                    continue;
3818                }
3819            }
3820
3821            /*
3822             * Read old and new backing file and take into consideration that
3823             * backing files may be smaller than the COW image.
3824             */
3825            if (offset >= old_backing_size) {
3826                memset(buf_old, 0, n);
3827                buf_old_is_zero = true;
3828            } else {
3829                if (offset + n > old_backing_size) {
3830                    n = old_backing_size - offset;
3831                }
3832
3833                ret = blk_pread(blk_old_backing, offset, n, buf_old, 0);
3834                if (ret < 0) {
3835                    error_report("error while reading from old backing file");
3836                    goto out;
3837                }
3838            }
3839
3840            if (offset >= new_backing_size || !blk_new_backing) {
3841                memset(buf_new, 0, n);
3842            } else {
3843                if (offset + n > new_backing_size) {
3844                    n = new_backing_size - offset;
3845                }
3846
3847                ret = blk_pread(blk_new_backing, offset, n, buf_new, 0);
3848                if (ret < 0) {
3849                    error_report("error while reading from new backing file");
3850                    goto out;
3851                }
3852            }
3853
3854            /* If they differ, we need to write to the COW file */
3855            uint64_t written = 0;
3856
3857            while (written < n) {
3858                int64_t pnum;
3859
3860                if (compare_buffers(buf_old + written, buf_new + written,
3861                                    n - written, &pnum))
3862                {
3863                    if (buf_old_is_zero) {
3864                        ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3865                    } else {
3866                        ret = blk_pwrite(blk, offset + written, pnum,
3867                                         buf_old + written, 0);
3868                    }
3869                    if (ret < 0) {
3870                        error_report("Error while writing to COW image: %s",
3871                            strerror(-ret));
3872                        goto out;
3873                    }
3874                }
3875
3876                written += pnum;
3877            }
3878            qemu_progress_print(local_progress, 100);
3879        }
3880    }
3881
3882    /*
3883     * Change the backing file. All clusters that are different from the old
3884     * backing file are overwritten in the COW file now, so the visible content
3885     * doesn't change when we switch the backing file.
3886     */
3887    if (out_baseimg && *out_baseimg) {
3888        ret = bdrv_change_backing_file(unfiltered_bs, out_baseimg, out_basefmt,
3889                                       true);
3890    } else {
3891        ret = bdrv_change_backing_file(unfiltered_bs, NULL, NULL, false);
3892    }
3893
3894    if (ret == -ENOSPC) {
3895        error_report("Could not change the backing file to '%s': No "
3896                     "space left in the file header", out_baseimg);
3897    } else if (ret == -EINVAL && out_baseimg && !out_basefmt) {
3898        error_report("Could not change the backing file to '%s': backing "
3899                     "format must be specified", out_baseimg);
3900    } else if (ret < 0) {
3901        error_report("Could not change the backing file to '%s': %s",
3902            out_baseimg, strerror(-ret));
3903    }
3904
3905    qemu_progress_print(100, 0);
3906    /*
3907     * TODO At this point it is possible to check if any clusters that are
3908     * allocated in the COW file are the same in the backing file. If so, they
3909     * could be dropped from the COW file. Don't do this before switching the
3910     * backing file, in case of a crash this would lead to corruption.
3911     */
3912out:
3913    qemu_progress_end();
3914    /* Cleanup */
3915    if (!unsafe) {
3916        blk_unref(blk_old_backing);
3917        blk_unref(blk_new_backing);
3918    }
3919    qemu_vfree(buf_old);
3920    qemu_vfree(buf_new);
3921
3922    blk_unref(blk);
3923    if (ret) {
3924        return 1;
3925    }
3926    return 0;
3927}
3928
3929static int img_resize(int argc, char **argv)
3930{
3931    Error *err = NULL;
3932    int c, ret, relative;
3933    const char *filename, *fmt, *size;
3934    int64_t n, total_size, current_size;
3935    bool quiet = false;
3936    BlockBackend *blk = NULL;
3937    PreallocMode prealloc = PREALLOC_MODE_OFF;
3938    QemuOpts *param;
3939
3940    static QemuOptsList resize_options = {
3941        .name = "resize_options",
3942        .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3943        .desc = {
3944            {
3945                .name = BLOCK_OPT_SIZE,
3946                .type = QEMU_OPT_SIZE,
3947                .help = "Virtual disk size"
3948            }, {
3949                /* end of list */
3950            }
3951        },
3952    };
3953    bool image_opts = false;
3954    bool shrink = false;
3955
3956    /* Remove size from argv manually so that negative numbers are not treated
3957     * as options by getopt. */
3958    if (argc < 3) {
3959        error_exit("Not enough arguments");
3960        return 1;
3961    }
3962
3963    size = argv[--argc];
3964
3965    /* Parse getopt arguments */
3966    fmt = NULL;
3967    for(;;) {
3968        static const struct option long_options[] = {
3969            {"help", no_argument, 0, 'h'},
3970            {"object", required_argument, 0, OPTION_OBJECT},
3971            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3972            {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
3973            {"shrink", no_argument, 0, OPTION_SHRINK},
3974            {0, 0, 0, 0}
3975        };
3976        c = getopt_long(argc, argv, ":f:hq",
3977                        long_options, NULL);
3978        if (c == -1) {
3979            break;
3980        }
3981        switch(c) {
3982        case ':':
3983            missing_argument(argv[optind - 1]);
3984            break;
3985        case '?':
3986            unrecognized_option(argv[optind - 1]);
3987            break;
3988        case 'h':
3989            help();
3990            break;
3991        case 'f':
3992            fmt = optarg;
3993            break;
3994        case 'q':
3995            quiet = true;
3996            break;
3997        case OPTION_OBJECT:
3998            user_creatable_process_cmdline(optarg);
3999            break;
4000        case OPTION_IMAGE_OPTS:
4001            image_opts = true;
4002            break;
4003        case OPTION_PREALLOCATION:
4004            prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
4005                                       PREALLOC_MODE__MAX, NULL);
4006            if (prealloc == PREALLOC_MODE__MAX) {
4007                error_report("Invalid preallocation mode '%s'", optarg);
4008                return 1;
4009            }
4010            break;
4011        case OPTION_SHRINK:
4012            shrink = true;
4013            break;
4014        }
4015    }
4016    if (optind != argc - 1) {
4017        error_exit("Expecting image file name and size");
4018    }
4019    filename = argv[optind++];
4020
4021    /* Choose grow, shrink, or absolute resize mode */
4022    switch (size[0]) {
4023    case '+':
4024        relative = 1;
4025        size++;
4026        break;
4027    case '-':
4028        relative = -1;
4029        size++;
4030        break;
4031    default:
4032        relative = 0;
4033        break;
4034    }
4035
4036    /* Parse size */
4037    param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
4038    if (!qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err)) {
4039        error_report_err(err);
4040        ret = -1;
4041        qemu_opts_del(param);
4042        goto out;
4043    }
4044    n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
4045    qemu_opts_del(param);
4046
4047    blk = img_open(image_opts, filename, fmt,
4048                   BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
4049                   false);
4050    if (!blk) {
4051        ret = -1;
4052        goto out;
4053    }
4054
4055    current_size = blk_getlength(blk);
4056    if (current_size < 0) {
4057        error_report("Failed to inquire current image length: %s",
4058                     strerror(-current_size));
4059        ret = -1;
4060        goto out;
4061    }
4062
4063    if (relative) {
4064        total_size = current_size + n * relative;
4065    } else {
4066        total_size = n;
4067    }
4068    if (total_size <= 0) {
4069        error_report("New image size must be positive");
4070        ret = -1;
4071        goto out;
4072    }
4073
4074    if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
4075        error_report("Preallocation can only be used for growing images");
4076        ret = -1;
4077        goto out;
4078    }
4079
4080    if (total_size < current_size && !shrink) {
4081        error_report("Use the --shrink option to perform a shrink operation.");
4082        warn_report("Shrinking an image will delete all data beyond the "
4083                    "shrunken image's end. Before performing such an "
4084                    "operation, make sure there is no important data there.");
4085        ret = -1;
4086        goto out;
4087    }
4088
4089    /*
4090     * The user expects the image to have the desired size after
4091     * resizing, so pass @exact=true.  It is of no use to report
4092     * success when the image has not actually been resized.
4093     */
4094    ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
4095    if (!ret) {
4096        qprintf(quiet, "Image resized.\n");
4097    } else {
4098        error_report_err(err);
4099    }
4100out:
4101    blk_unref(blk);
4102    if (ret) {
4103        return 1;
4104    }
4105    return 0;
4106}
4107
4108static void amend_status_cb(BlockDriverState *bs,
4109                            int64_t offset, int64_t total_work_size,
4110                            void *opaque)
4111{
4112    qemu_progress_print(100.f * offset / total_work_size, 0);
4113}
4114
4115static int print_amend_option_help(const char *format)
4116{
4117    BlockDriver *drv;
4118
4119    /* Find driver and parse its options */
4120    drv = bdrv_find_format(format);
4121    if (!drv) {
4122        error_report("Unknown file format '%s'", format);
4123        return 1;
4124    }
4125
4126    if (!drv->bdrv_amend_options) {
4127        error_report("Format driver '%s' does not support option amendment",
4128                     format);
4129        return 1;
4130    }
4131
4132    /* Every driver supporting amendment must have amend_opts */
4133    assert(drv->amend_opts);
4134
4135    printf("Amend options for '%s':\n", format);
4136    qemu_opts_print_help(drv->amend_opts, false);
4137    return 0;
4138}
4139
4140static int img_amend(int argc, char **argv)
4141{
4142    Error *err = NULL;
4143    int c, ret = 0;
4144    char *options = NULL;
4145    QemuOptsList *amend_opts = NULL;
4146    QemuOpts *opts = NULL;
4147    const char *fmt = NULL, *filename, *cache;
4148    int flags;
4149    bool writethrough;
4150    bool quiet = false, progress = false;
4151    BlockBackend *blk = NULL;
4152    BlockDriverState *bs = NULL;
4153    bool image_opts = false;
4154    bool force = false;
4155
4156    cache = BDRV_DEFAULT_CACHE;
4157    for (;;) {
4158        static const struct option long_options[] = {
4159            {"help", no_argument, 0, 'h'},
4160            {"object", required_argument, 0, OPTION_OBJECT},
4161            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4162            {"force", no_argument, 0, OPTION_FORCE},
4163            {0, 0, 0, 0}
4164        };
4165        c = getopt_long(argc, argv, ":ho:f:t:pq",
4166                        long_options, NULL);
4167        if (c == -1) {
4168            break;
4169        }
4170
4171        switch (c) {
4172        case ':':
4173            missing_argument(argv[optind - 1]);
4174            break;
4175        case '?':
4176            unrecognized_option(argv[optind - 1]);
4177            break;
4178        case 'h':
4179            help();
4180            break;
4181        case 'o':
4182            if (accumulate_options(&options, optarg) < 0) {
4183                ret = -1;
4184                goto out_no_progress;
4185            }
4186            break;
4187        case 'f':
4188            fmt = optarg;
4189            break;
4190        case 't':
4191            cache = optarg;
4192            break;
4193        case 'p':
4194            progress = true;
4195            break;
4196        case 'q':
4197            quiet = true;
4198            break;
4199        case OPTION_OBJECT:
4200            user_creatable_process_cmdline(optarg);
4201            break;
4202        case OPTION_IMAGE_OPTS:
4203            image_opts = true;
4204            break;
4205        case OPTION_FORCE:
4206            force = true;
4207            break;
4208        }
4209    }
4210
4211    if (!options) {
4212        error_exit("Must specify options (-o)");
4213    }
4214
4215    if (quiet) {
4216        progress = false;
4217    }
4218    qemu_progress_init(progress, 1.0);
4219
4220    filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4221    if (fmt && has_help_option(options)) {
4222        /* If a format is explicitly specified (and possibly no filename is
4223         * given), print option help here */
4224        ret = print_amend_option_help(fmt);
4225        goto out;
4226    }
4227
4228    if (optind != argc - 1) {
4229        error_report("Expecting one image file name");
4230        ret = -1;
4231        goto out;
4232    }
4233
4234    flags = BDRV_O_RDWR;
4235    ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4236    if (ret < 0) {
4237        error_report("Invalid cache option: %s", cache);
4238        goto out;
4239    }
4240
4241    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4242                   false);
4243    if (!blk) {
4244        ret = -1;
4245        goto out;
4246    }
4247    bs = blk_bs(blk);
4248
4249    fmt = bs->drv->format_name;
4250
4251    if (has_help_option(options)) {
4252        /* If the format was auto-detected, print option help here */
4253        ret = print_amend_option_help(fmt);
4254        goto out;
4255    }
4256
4257    if (!bs->drv->bdrv_amend_options) {
4258        error_report("Format driver '%s' does not support option amendment",
4259                     fmt);
4260        ret = -1;
4261        goto out;
4262    }
4263
4264    /* Every driver supporting amendment must have amend_opts */
4265    assert(bs->drv->amend_opts);
4266
4267    amend_opts = qemu_opts_append(amend_opts, bs->drv->amend_opts);
4268    opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4269    if (!qemu_opts_do_parse(opts, options, NULL, &err)) {
4270        /* Try to parse options using the create options */
4271        amend_opts = qemu_opts_append(amend_opts, bs->drv->create_opts);
4272        qemu_opts_del(opts);
4273        opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4274        if (qemu_opts_do_parse(opts, options, NULL, NULL)) {
4275            error_append_hint(&err,
4276                              "This option is only supported for image creation\n");
4277        }
4278
4279        error_report_err(err);
4280        ret = -1;
4281        goto out;
4282    }
4283
4284    /* In case the driver does not call amend_status_cb() */
4285    qemu_progress_print(0.f, 0);
4286    ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
4287    qemu_progress_print(100.f, 0);
4288    if (ret < 0) {
4289        error_report_err(err);
4290        goto out;
4291    }
4292
4293out:
4294    qemu_progress_end();
4295
4296out_no_progress:
4297    blk_unref(blk);
4298    qemu_opts_del(opts);
4299    qemu_opts_free(amend_opts);
4300    g_free(options);
4301
4302    if (ret) {
4303        return 1;
4304    }
4305    return 0;
4306}
4307
4308typedef struct BenchData {
4309    BlockBackend *blk;
4310    uint64_t image_size;
4311    bool write;
4312    int bufsize;
4313    int step;
4314    int nrreq;
4315    int n;
4316    int flush_interval;
4317    bool drain_on_flush;
4318    uint8_t *buf;
4319    QEMUIOVector *qiov;
4320
4321    int in_flight;
4322    bool in_flush;
4323    uint64_t offset;
4324} BenchData;
4325
4326static void bench_undrained_flush_cb(void *opaque, int ret)
4327{
4328    if (ret < 0) {
4329        error_report("Failed flush request: %s", strerror(-ret));
4330        exit(EXIT_FAILURE);
4331    }
4332}
4333
4334static void bench_cb(void *opaque, int ret)
4335{
4336    BenchData *b = opaque;
4337    BlockAIOCB *acb;
4338
4339    if (ret < 0) {
4340        error_report("Failed request: %s", strerror(-ret));
4341        exit(EXIT_FAILURE);
4342    }
4343
4344    if (b->in_flush) {
4345        /* Just finished a flush with drained queue: Start next requests */
4346        assert(b->in_flight == 0);
4347        b->in_flush = false;
4348    } else if (b->in_flight > 0) {
4349        int remaining = b->n - b->in_flight;
4350
4351        b->n--;
4352        b->in_flight--;
4353
4354        /* Time for flush? Drain queue if requested, then flush */
4355        if (b->flush_interval && remaining % b->flush_interval == 0) {
4356            if (!b->in_flight || !b->drain_on_flush) {
4357                BlockCompletionFunc *cb;
4358
4359                if (b->drain_on_flush) {
4360                    b->in_flush = true;
4361                    cb = bench_cb;
4362                } else {
4363                    cb = bench_undrained_flush_cb;
4364                }
4365
4366                acb = blk_aio_flush(b->blk, cb, b);
4367                if (!acb) {
4368                    error_report("Failed to issue flush request");
4369                    exit(EXIT_FAILURE);
4370                }
4371            }
4372            if (b->drain_on_flush) {
4373                return;
4374            }
4375        }
4376    }
4377
4378    while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4379        int64_t offset = b->offset;
4380        /* blk_aio_* might look for completed I/Os and kick bench_cb
4381         * again, so make sure this operation is counted by in_flight
4382         * and b->offset is ready for the next submission.
4383         */
4384        b->in_flight++;
4385        b->offset += b->step;
4386        b->offset %= b->image_size;
4387        if (b->write) {
4388            acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4389        } else {
4390            acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4391        }
4392        if (!acb) {
4393            error_report("Failed to issue request");
4394            exit(EXIT_FAILURE);
4395        }
4396    }
4397}
4398
4399static int img_bench(int argc, char **argv)
4400{
4401    int c, ret = 0;
4402    const char *fmt = NULL, *filename;
4403    bool quiet = false;
4404    bool image_opts = false;
4405    bool is_write = false;
4406    int count = 75000;
4407    int depth = 64;
4408    int64_t offset = 0;
4409    size_t bufsize = 4096;
4410    int pattern = 0;
4411    size_t step = 0;
4412    int flush_interval = 0;
4413    bool drain_on_flush = true;
4414    int64_t image_size;
4415    BlockBackend *blk = NULL;
4416    BenchData data = {};
4417    int flags = 0;
4418    bool writethrough = false;
4419    struct timeval t1, t2;
4420    int i;
4421    bool force_share = false;
4422    size_t buf_size = 0;
4423
4424    for (;;) {
4425        static const struct option long_options[] = {
4426            {"help", no_argument, 0, 'h'},
4427            {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4428            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4429            {"pattern", required_argument, 0, OPTION_PATTERN},
4430            {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4431            {"force-share", no_argument, 0, 'U'},
4432            {0, 0, 0, 0}
4433        };
4434        c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
4435                        NULL);
4436        if (c == -1) {
4437            break;
4438        }
4439
4440        switch (c) {
4441        case ':':
4442            missing_argument(argv[optind - 1]);
4443            break;
4444        case '?':
4445            unrecognized_option(argv[optind - 1]);
4446            break;
4447        case 'h':
4448            help();
4449            break;
4450        case 'c':
4451        {
4452            unsigned long res;
4453
4454            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4455                error_report("Invalid request count specified");
4456                return 1;
4457            }
4458            count = res;
4459            break;
4460        }
4461        case 'd':
4462        {
4463            unsigned long res;
4464
4465            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4466                error_report("Invalid queue depth specified");
4467                return 1;
4468            }
4469            depth = res;
4470            break;
4471        }
4472        case 'f':
4473            fmt = optarg;
4474            break;
4475        case 'n':
4476            flags |= BDRV_O_NATIVE_AIO;
4477            break;
4478        case 'i':
4479            ret = bdrv_parse_aio(optarg, &flags);
4480            if (ret < 0) {
4481                error_report("Invalid aio option: %s", optarg);
4482                ret = -1;
4483                goto out;
4484            }
4485            break;
4486        case 'o':
4487        {
4488            offset = cvtnum("offset", optarg);
4489            if (offset < 0) {
4490                return 1;
4491            }
4492            break;
4493        }
4494            break;
4495        case 'q':
4496            quiet = true;
4497            break;
4498        case 's':
4499        {
4500            int64_t sval;
4501
4502            sval = cvtnum_full("buffer size", optarg, 0, INT_MAX);
4503            if (sval < 0) {
4504                return 1;
4505            }
4506
4507            bufsize = sval;
4508            break;
4509        }
4510        case 'S':
4511        {
4512            int64_t sval;
4513
4514            sval = cvtnum_full("step_size", optarg, 0, INT_MAX);
4515            if (sval < 0) {
4516                return 1;
4517            }
4518
4519            step = sval;
4520            break;
4521        }
4522        case 't':
4523            ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4524            if (ret < 0) {
4525                error_report("Invalid cache mode");
4526                ret = -1;
4527                goto out;
4528            }
4529            break;
4530        case 'w':
4531            flags |= BDRV_O_RDWR;
4532            is_write = true;
4533            break;
4534        case 'U':
4535            force_share = true;
4536            break;
4537        case OPTION_PATTERN:
4538        {
4539            unsigned long res;
4540
4541            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4542                error_report("Invalid pattern byte specified");
4543                return 1;
4544            }
4545            pattern = res;
4546            break;
4547        }
4548        case OPTION_FLUSH_INTERVAL:
4549        {
4550            unsigned long res;
4551
4552            if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4553                error_report("Invalid flush interval specified");
4554                return 1;
4555            }
4556            flush_interval = res;
4557            break;
4558        }
4559        case OPTION_NO_DRAIN:
4560            drain_on_flush = false;
4561            break;
4562        case OPTION_IMAGE_OPTS:
4563            image_opts = true;
4564            break;
4565        }
4566    }
4567
4568    if (optind != argc - 1) {
4569        error_exit("Expecting one image file name");
4570    }
4571    filename = argv[argc - 1];
4572
4573    if (!is_write && flush_interval) {
4574        error_report("--flush-interval is only available in write tests");
4575        ret = -1;
4576        goto out;
4577    }
4578    if (flush_interval && flush_interval < depth) {
4579        error_report("Flush interval can't be smaller than depth");
4580        ret = -1;
4581        goto out;
4582    }
4583
4584    blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4585                   force_share);
4586    if (!blk) {
4587        ret = -1;
4588        goto out;
4589    }
4590
4591    image_size = blk_getlength(blk);
4592    if (image_size < 0) {
4593        ret = image_size;
4594        goto out;
4595    }
4596
4597    data = (BenchData) {
4598        .blk            = blk,
4599        .image_size     = image_size,
4600        .bufsize        = bufsize,
4601        .step           = step ?: bufsize,
4602        .nrreq          = depth,
4603        .n              = count,
4604        .offset         = offset,
4605        .write          = is_write,
4606        .flush_interval = flush_interval,
4607        .drain_on_flush = drain_on_flush,
4608    };
4609    printf("Sending %d %s requests, %d bytes each, %d in parallel "
4610           "(starting at offset %" PRId64 ", step size %d)\n",
4611           data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4612           data.offset, data.step);
4613    if (flush_interval) {
4614        printf("Sending flush every %d requests\n", flush_interval);
4615    }
4616
4617    buf_size = data.nrreq * data.bufsize;
4618    data.buf = blk_blockalign(blk, buf_size);
4619    memset(data.buf, pattern, data.nrreq * data.bufsize);
4620
4621    blk_register_buf(blk, data.buf, buf_size, &error_fatal);
4622
4623    data.qiov = g_new(QEMUIOVector, data.nrreq);
4624    for (i = 0; i < data.nrreq; i++) {
4625        qemu_iovec_init(&data.qiov[i], 1);
4626        qemu_iovec_add(&data.qiov[i],
4627                       data.buf + i * data.bufsize, data.bufsize);
4628    }
4629
4630    gettimeofday(&t1, NULL);
4631    bench_cb(&data, 0);
4632
4633    while (data.n > 0) {
4634        main_loop_wait(false);
4635    }
4636    gettimeofday(&t2, NULL);
4637
4638    printf("Run completed in %3.3f seconds.\n",
4639           (t2.tv_sec - t1.tv_sec)
4640           + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4641
4642out:
4643    if (data.buf) {
4644        blk_unregister_buf(blk, data.buf, buf_size);
4645    }
4646    qemu_vfree(data.buf);
4647    blk_unref(blk);
4648
4649    if (ret) {
4650        return 1;
4651    }
4652    return 0;
4653}
4654
4655enum ImgBitmapAct {
4656    BITMAP_ADD,
4657    BITMAP_REMOVE,
4658    BITMAP_CLEAR,
4659    BITMAP_ENABLE,
4660    BITMAP_DISABLE,
4661    BITMAP_MERGE,
4662};
4663typedef struct ImgBitmapAction {
4664    enum ImgBitmapAct act;
4665    const char *src; /* only used for merge */
4666    QSIMPLEQ_ENTRY(ImgBitmapAction) next;
4667} ImgBitmapAction;
4668
4669static int img_bitmap(int argc, char **argv)
4670{
4671    Error *err = NULL;
4672    int c, ret = 1;
4673    QemuOpts *opts = NULL;
4674    const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL;
4675    const char *filename, *bitmap;
4676    BlockBackend *blk = NULL, *src = NULL;
4677    BlockDriverState *bs = NULL, *src_bs = NULL;
4678    bool image_opts = false;
4679    int64_t granularity = 0;
4680    bool add = false, merge = false;
4681    QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
4682    ImgBitmapAction *act, *act_next;
4683    const char *op;
4684    int inactivate_ret;
4685
4686    QSIMPLEQ_INIT(&actions);
4687
4688    for (;;) {
4689        static const struct option long_options[] = {
4690            {"help", no_argument, 0, 'h'},
4691            {"object", required_argument, 0, OPTION_OBJECT},
4692            {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4693            {"add", no_argument, 0, OPTION_ADD},
4694            {"remove", no_argument, 0, OPTION_REMOVE},
4695            {"clear", no_argument, 0, OPTION_CLEAR},
4696            {"enable", no_argument, 0, OPTION_ENABLE},
4697            {"disable", no_argument, 0, OPTION_DISABLE},
4698            {"merge", required_argument, 0, OPTION_MERGE},
4699            {"granularity", required_argument, 0, 'g'},
4700            {"source-file", required_argument, 0, 'b'},
4701            {"source-format", required_argument, 0, 'F'},
4702            {0, 0, 0, 0}
4703        };
4704        c = getopt_long(argc, argv, ":b:f:F:g:h", long_options, NULL);
4705        if (c == -1) {
4706            break;
4707        }
4708
4709        switch (c) {
4710        case ':':
4711            missing_argument(argv[optind - 1]);
4712            break;
4713        case '?':
4714            unrecognized_option(argv[optind - 1]);
4715            break;
4716        case 'h':
4717            help();
4718            break;
4719        case 'b':
4720            src_filename = optarg;
4721            break;
4722        case 'f':
4723            fmt = optarg;
4724            break;
4725        case 'F':
4726            src_fmt = optarg;
4727            break;
4728        case 'g':
4729            granularity = cvtnum("granularity", optarg);
4730            if (granularity < 0) {
4731                return 1;
4732            }
4733            break;
4734        case OPTION_ADD:
4735            act = g_new0(ImgBitmapAction, 1);
4736            act->act = BITMAP_ADD;
4737            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4738            add = true;
4739            break;
4740        case OPTION_REMOVE:
4741            act = g_new0(ImgBitmapAction, 1);
4742            act->act = BITMAP_REMOVE;
4743            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4744            break;
4745        case OPTION_CLEAR:
4746            act = g_new0(ImgBitmapAction, 1);
4747            act->act = BITMAP_CLEAR;
4748            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4749            break;
4750        case OPTION_ENABLE:
4751            act = g_new0(ImgBitmapAction, 1);
4752            act->act = BITMAP_ENABLE;
4753            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4754            break;
4755        case OPTION_DISABLE:
4756            act = g_new0(ImgBitmapAction, 1);
4757            act->act = BITMAP_DISABLE;
4758            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4759            break;
4760        case OPTION_MERGE:
4761            act = g_new0(ImgBitmapAction, 1);
4762            act->act = BITMAP_MERGE;
4763            act->src = optarg;
4764            QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4765            merge = true;
4766            break;
4767        case OPTION_OBJECT:
4768            user_creatable_process_cmdline(optarg);
4769            break;
4770        case OPTION_IMAGE_OPTS:
4771            image_opts = true;
4772            break;
4773        }
4774    }
4775
4776    if (QSIMPLEQ_EMPTY(&actions)) {
4777        error_report("Need at least one of --add, --remove, --clear, "
4778                     "--enable, --disable, or --merge");
4779        goto out;
4780    }
4781
4782    if (granularity && !add) {
4783        error_report("granularity only supported with --add");
4784        goto out;
4785    }
4786    if (src_fmt && !src_filename) {
4787        error_report("-F only supported with -b");
4788        goto out;
4789    }
4790    if (src_filename && !merge) {
4791        error_report("Merge bitmap source file only supported with "
4792                     "--merge");
4793        goto out;
4794    }
4795
4796    if (optind != argc - 2) {
4797        error_report("Expecting filename and bitmap name");
4798        goto out;
4799    }
4800
4801    filename = argv[optind];
4802    bitmap = argv[optind + 1];
4803
4804    /*
4805     * No need to open backing chains; we will be manipulating bitmaps
4806     * directly in this image without reference to image contents.
4807     */
4808    blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING,
4809                   false, false, false);
4810    if (!blk) {
4811        goto out;
4812    }
4813    bs = blk_bs(blk);
4814    if (src_filename) {
4815        src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING,
4816                       false, false, false);
4817        if (!src) {
4818            goto out;
4819        }
4820        src_bs = blk_bs(src);
4821    } else {
4822        src_bs = bs;
4823    }
4824
4825    QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) {
4826        switch (act->act) {
4827        case BITMAP_ADD:
4828            qmp_block_dirty_bitmap_add(bs->node_name, bitmap,
4829                                       !!granularity, granularity, true, true,
4830                                       false, false, &err);
4831            op = "add";
4832            break;
4833        case BITMAP_REMOVE:
4834            qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err);
4835            op = "remove";
4836            break;
4837        case BITMAP_CLEAR:
4838            qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err);
4839            op = "clear";
4840            break;
4841        case BITMAP_ENABLE:
4842            qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err);
4843            op = "enable";
4844            break;
4845        case BITMAP_DISABLE:
4846            qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err);
4847            op = "disable";
4848            break;
4849        case BITMAP_MERGE:
4850            do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name,
4851                                  act->src, &err);
4852            op = "merge";
4853            break;
4854        default:
4855            g_assert_not_reached();
4856        }
4857
4858        if (err) {
4859            error_reportf_err(err, "Operation %s on bitmap %s failed: ",
4860                              op, bitmap);
4861            goto out;
4862        }
4863        g_free(act);
4864    }
4865
4866    ret = 0;
4867
4868 out:
4869    /*
4870     * Manually inactivate the images first because this way we can know whether
4871     * an error occurred. blk_unref() doesn't tell us about failures.
4872     */
4873    inactivate_ret = bdrv_inactivate_all();
4874    if (inactivate_ret < 0) {
4875        error_report("Error while closing the image: %s", strerror(-inactivate_ret));
4876        ret = 1;
4877    }
4878
4879    blk_unref(src);
4880    blk_unref(blk);
4881    qemu_opts_del(opts);
4882    return ret;
4883}
4884
4885#define C_BS      01
4886#define C_COUNT   02
4887#define C_IF      04
4888#define C_OF      010
4889#define C_SKIP    020
4890
4891struct DdInfo {
4892    unsigned int flags;
4893    int64_t count;
4894};
4895
4896struct DdIo {
4897    int bsz;    /* Block size */
4898    char *filename;
4899    uint8_t *buf;
4900    int64_t offset;
4901};
4902
4903struct DdOpts {
4904    const char *name;
4905    int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4906    unsigned int flag;
4907};
4908
4909static int img_dd_bs(const char *arg,
4910                     struct DdIo *in, struct DdIo *out,
4911                     struct DdInfo *dd)
4912{
4913    int64_t res;
4914
4915    res = cvtnum_full("bs", arg, 1, INT_MAX);
4916
4917    if (res < 0) {
4918        return 1;
4919    }
4920    in->bsz = out->bsz = res;
4921
4922    return 0;
4923}
4924
4925static int img_dd_count(const char *arg,
4926                        struct DdIo *in, struct DdIo *out,
4927                        struct DdInfo *dd)
4928{
4929    dd->count = cvtnum("count", arg);
4930
4931    if (dd->count < 0) {
4932        return 1;
4933    }
4934
4935    return 0;
4936}
4937
4938static int img_dd_if(const char *arg,
4939                     struct DdIo *in, struct DdIo *out,
4940                     struct DdInfo *dd)
4941{
4942    in->filename = g_strdup(arg);
4943
4944    return 0;
4945}
4946
4947static int img_dd_of(const char *arg,
4948                     struct DdIo *in, struct DdIo *out,
4949                     struct DdInfo *dd)
4950{
4951    out->filename = g_strdup(arg);
4952
4953    return 0;
4954}
4955
4956static int img_dd_skip(const char *arg,
4957                       struct DdIo *in, struct DdIo *out,
4958                       struct DdInfo *dd)
4959{
4960    in->offset = cvtnum("skip", arg);
4961
4962    if (in->offset < 0) {
4963        return 1;
4964    }
4965
4966    return 0;
4967}
4968
4969static int img_dd(int argc, char **argv)
4970{
4971    int ret = 0;
4972    char *arg = NULL;
4973    char *tmp;
4974    BlockDriver *drv = NULL, *proto_drv = NULL;
4975    BlockBackend *blk1 = NULL, *blk2 = NULL;
4976    QemuOpts *opts = NULL;
4977    QemuOptsList *create_opts = NULL;
4978    Error *local_err = NULL;
4979    bool image_opts = false;
4980    int c, i;
4981    const char *out_fmt = "raw";
4982    const char *fmt = NULL;
4983    int64_t size = 0;
4984    int64_t out_pos, in_pos;
4985    bool force_share = false;
4986    struct DdInfo dd = {
4987        .flags = 0,
4988        .count = 0,
4989    };
4990    struct DdIo in = {
4991        .bsz = 512, /* Block size is by default 512 bytes */
4992        .filename = NULL,
4993        .buf = NULL,
4994        .offset = 0
4995    };
4996    struct DdIo out = {
4997        .bsz = 512,
4998        .filename = NULL,
4999        .buf = NULL,
5000        .offset = 0
5001    };
5002
5003    const struct DdOpts options[] = {
5004        { "bs", img_dd_bs, C_BS },
5005        { "count", img_dd_count, C_COUNT },
5006        { "if", img_dd_if, C_IF },
5007        { "of", img_dd_of, C_OF },
5008        { "skip", img_dd_skip, C_SKIP },
5009        { NULL, NULL, 0 }
5010    };
5011    const struct option long_options[] = {
5012        { "help", no_argument, 0, 'h'},
5013        { "object", required_argument, 0, OPTION_OBJECT},
5014        { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5015        { "force-share", no_argument, 0, 'U'},
5016        { 0, 0, 0, 0 }
5017    };
5018
5019    while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
5020        if (c == EOF) {
5021            break;
5022        }
5023        switch (c) {
5024        case 'O':
5025            out_fmt = optarg;
5026            break;
5027        case 'f':
5028            fmt = optarg;
5029            break;
5030        case ':':
5031            missing_argument(argv[optind - 1]);
5032            break;
5033        case '?':
5034            unrecognized_option(argv[optind - 1]);
5035            break;
5036        case 'h':
5037            help();
5038            break;
5039        case 'U':
5040            force_share = true;
5041            break;
5042        case OPTION_OBJECT:
5043            user_creatable_process_cmdline(optarg);
5044            break;
5045        case OPTION_IMAGE_OPTS:
5046            image_opts = true;
5047            break;
5048        }
5049    }
5050
5051    for (i = optind; i < argc; i++) {
5052        int j;
5053        arg = g_strdup(argv[i]);
5054
5055        tmp = strchr(arg, '=');
5056        if (tmp == NULL) {
5057            error_report("unrecognized operand %s", arg);
5058            ret = -1;
5059            goto out;
5060        }
5061
5062        *tmp++ = '\0';
5063
5064        for (j = 0; options[j].name != NULL; j++) {
5065            if (!strcmp(arg, options[j].name)) {
5066                break;
5067            }
5068        }
5069        if (options[j].name == NULL) {
5070            error_report("unrecognized operand %s", arg);
5071            ret = -1;
5072            goto out;
5073        }
5074
5075        if (options[j].f(tmp, &in, &out, &dd) != 0) {
5076            ret = -1;
5077            goto out;
5078        }
5079        dd.flags |= options[j].flag;
5080        g_free(arg);
5081        arg = NULL;
5082    }
5083
5084    if (!(dd.flags & C_IF && dd.flags & C_OF)) {
5085        error_report("Must specify both input and output files");
5086        ret = -1;
5087        goto out;
5088    }
5089
5090    blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
5091                    force_share);
5092
5093    if (!blk1) {
5094        ret = -1;
5095        goto out;
5096    }
5097
5098    drv = bdrv_find_format(out_fmt);
5099    if (!drv) {
5100        error_report("Unknown file format");
5101        ret = -1;
5102        goto out;
5103    }
5104    proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
5105
5106    if (!proto_drv) {
5107        error_report_err(local_err);
5108        ret = -1;
5109        goto out;
5110    }
5111    if (!drv->create_opts) {
5112        error_report("Format driver '%s' does not support image creation",
5113                     drv->format_name);
5114        ret = -1;
5115        goto out;
5116    }
5117    if (!proto_drv->create_opts) {
5118        error_report("Protocol driver '%s' does not support image creation",
5119                     proto_drv->format_name);
5120        ret = -1;
5121        goto out;
5122    }
5123    create_opts = qemu_opts_append(create_opts, drv->create_opts);
5124    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5125
5126    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5127
5128    size = blk_getlength(blk1);
5129    if (size < 0) {
5130        error_report("Failed to get size for '%s'", in.filename);
5131        ret = -1;
5132        goto out;
5133    }
5134
5135    if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
5136        dd.count * in.bsz < size) {
5137        size = dd.count * in.bsz;
5138    }
5139
5140    /* Overflow means the specified offset is beyond input image's size */
5141    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5142                              size < in.bsz * in.offset)) {
5143        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
5144    } else {
5145        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
5146                            size - in.bsz * in.offset, &error_abort);
5147    }
5148
5149    ret = bdrv_create(drv, out.filename, opts, &local_err);
5150    if (ret < 0) {
5151        error_reportf_err(local_err,
5152                          "%s: error while creating output image: ",
5153                          out.filename);
5154        ret = -1;
5155        goto out;
5156    }
5157
5158    /* TODO, we can't honour --image-opts for the target,
5159     * since it needs to be given in a format compatible
5160     * with the bdrv_create() call above which does not
5161     * support image-opts style.
5162     */
5163    blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
5164                         false, false, false);
5165
5166    if (!blk2) {
5167        ret = -1;
5168        goto out;
5169    }
5170
5171    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5172                              size < in.offset * in.bsz)) {
5173        /* We give a warning if the skip option is bigger than the input
5174         * size and create an empty output disk image (i.e. like dd(1)).
5175         */
5176        error_report("%s: cannot skip to specified offset", in.filename);
5177        in_pos = size;
5178    } else {
5179        in_pos = in.offset * in.bsz;
5180    }
5181
5182    in.buf = g_new(uint8_t, in.bsz);
5183
5184    for (out_pos = 0; in_pos < size; ) {
5185        int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
5186
5187        ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
5188        if (ret < 0) {
5189            error_report("error while reading from input image file: %s",
5190                         strerror(-ret));
5191            goto out;
5192        }
5193        in_pos += bytes;
5194
5195        ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
5196        if (ret < 0) {
5197            error_report("error while writing to output image file: %s",
5198                         strerror(-ret));
5199            goto out;
5200        }
5201        out_pos += bytes;
5202    }
5203
5204out:
5205    g_free(arg);
5206    qemu_opts_del(opts);
5207    qemu_opts_free(create_opts);
5208    blk_unref(blk1);
5209    blk_unref(blk2);
5210    g_free(in.filename);
5211    g_free(out.filename);
5212    g_free(in.buf);
5213    g_free(out.buf);
5214
5215    if (ret) {
5216        return 1;
5217    }
5218    return 0;
5219}
5220
5221static void dump_json_block_measure_info(BlockMeasureInfo *info)
5222{
5223    GString *str;
5224    QObject *obj;
5225    Visitor *v = qobject_output_visitor_new(&obj);
5226
5227    visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
5228    visit_complete(v, &obj);
5229    str = qobject_to_json_pretty(obj, true);
5230    assert(str != NULL);
5231    printf("%s\n", str->str);
5232    qobject_unref(obj);
5233    visit_free(v);
5234    g_string_free(str, true);
5235}
5236
5237static int img_measure(int argc, char **argv)
5238{
5239    static const struct option long_options[] = {
5240        {"help", no_argument, 0, 'h'},
5241        {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5242        {"object", required_argument, 0, OPTION_OBJECT},
5243        {"output", required_argument, 0, OPTION_OUTPUT},
5244        {"size", required_argument, 0, OPTION_SIZE},
5245        {"force-share", no_argument, 0, 'U'},
5246        {0, 0, 0, 0}
5247    };
5248    OutputFormat output_format = OFORMAT_HUMAN;
5249    BlockBackend *in_blk = NULL;
5250    BlockDriver *drv;
5251    const char *filename = NULL;
5252    const char *fmt = NULL;
5253    const char *out_fmt = "raw";
5254    char *options = NULL;
5255    char *snapshot_name = NULL;
5256    bool force_share = false;
5257    QemuOpts *opts = NULL;
5258    QemuOpts *object_opts = NULL;
5259    QemuOpts *sn_opts = NULL;
5260    QemuOptsList *create_opts = NULL;
5261    bool image_opts = false;
5262    uint64_t img_size = UINT64_MAX;
5263    BlockMeasureInfo *info = NULL;
5264    Error *local_err = NULL;
5265    int ret = 1;
5266    int c;
5267
5268    while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
5269                            long_options, NULL)) != -1) {
5270        switch (c) {
5271        case '?':
5272        case 'h':
5273            help();
5274            break;
5275        case 'f':
5276            fmt = optarg;
5277            break;
5278        case 'O':
5279            out_fmt = optarg;
5280            break;
5281        case 'o':
5282            if (accumulate_options(&options, optarg) < 0) {
5283                goto out;
5284            }
5285            break;
5286        case 'l':
5287            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
5288                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
5289                                                  optarg, false);
5290                if (!sn_opts) {
5291                    error_report("Failed in parsing snapshot param '%s'",
5292                                 optarg);
5293                    goto out;
5294                }
5295            } else {
5296                snapshot_name = optarg;
5297            }
5298            break;
5299        case 'U':
5300            force_share = true;
5301            break;
5302        case OPTION_OBJECT:
5303            user_creatable_process_cmdline(optarg);
5304            break;
5305        case OPTION_IMAGE_OPTS:
5306            image_opts = true;
5307            break;
5308        case OPTION_OUTPUT:
5309            if (!strcmp(optarg, "json")) {
5310                output_format = OFORMAT_JSON;
5311            } else if (!strcmp(optarg, "human")) {
5312                output_format = OFORMAT_HUMAN;
5313            } else {
5314                error_report("--output must be used with human or json "
5315                             "as argument.");
5316                goto out;
5317            }
5318            break;
5319        case OPTION_SIZE:
5320        {
5321            int64_t sval;
5322
5323            sval = cvtnum("image size", optarg);
5324            if (sval < 0) {
5325                goto out;
5326            }
5327            img_size = (uint64_t)sval;
5328        }
5329        break;
5330        }
5331    }
5332
5333    if (argc - optind > 1) {
5334        error_report("At most one filename argument is allowed.");
5335        goto out;
5336    } else if (argc - optind == 1) {
5337        filename = argv[optind];
5338    }
5339
5340    if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
5341        error_report("--image-opts, -f, and -l require a filename argument.");
5342        goto out;
5343    }
5344    if (filename && img_size != UINT64_MAX) {
5345        error_report("--size N cannot be used together with a filename.");
5346        goto out;
5347    }
5348    if (!filename && img_size == UINT64_MAX) {
5349        error_report("Either --size N or one filename must be specified.");
5350        goto out;
5351    }
5352
5353    if (filename) {
5354        in_blk = img_open(image_opts, filename, fmt, 0,
5355                          false, false, force_share);
5356        if (!in_blk) {
5357            goto out;
5358        }
5359
5360        if (sn_opts) {
5361            bdrv_snapshot_load_tmp(blk_bs(in_blk),
5362                    qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
5363                    qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
5364                    &local_err);
5365        } else if (snapshot_name != NULL) {
5366            bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
5367                    snapshot_name, &local_err);
5368        }
5369        if (local_err) {
5370            error_reportf_err(local_err, "Failed to load snapshot: ");
5371            goto out;
5372        }
5373    }
5374
5375    drv = bdrv_find_format(out_fmt);
5376    if (!drv) {
5377        error_report("Unknown file format '%s'", out_fmt);
5378        goto out;
5379    }
5380    if (!drv->create_opts) {
5381        error_report("Format driver '%s' does not support image creation",
5382                     drv->format_name);
5383        goto out;
5384    }
5385
5386    create_opts = qemu_opts_append(create_opts, drv->create_opts);
5387    create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
5388    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5389    if (options) {
5390        if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
5391            error_report_err(local_err);
5392            error_report("Invalid options for file format '%s'", out_fmt);
5393            goto out;
5394        }
5395    }
5396    if (img_size != UINT64_MAX) {
5397        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5398    }
5399
5400    info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5401    if (local_err) {
5402        error_report_err(local_err);
5403        goto out;
5404    }
5405
5406    if (output_format == OFORMAT_HUMAN) {
5407        printf("required size: %" PRIu64 "\n", info->required);
5408        printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5409        if (info->has_bitmaps) {
5410            printf("bitmaps size: %" PRIu64 "\n", info->bitmaps);
5411        }
5412    } else {
5413        dump_json_block_measure_info(info);
5414    }
5415
5416    ret = 0;
5417
5418out:
5419    qapi_free_BlockMeasureInfo(info);
5420    qemu_opts_del(object_opts);
5421    qemu_opts_del(opts);
5422    qemu_opts_del(sn_opts);
5423    qemu_opts_free(create_opts);
5424    g_free(options);
5425    blk_unref(in_blk);
5426    return ret;
5427}
5428
5429static const img_cmd_t img_cmds[] = {
5430#define DEF(option, callback, arg_string)        \
5431    { option, callback },
5432#include "qemu-img-cmds.h"
5433#undef DEF
5434    { NULL, NULL, },
5435};
5436
5437int main(int argc, char **argv)
5438{
5439    const img_cmd_t *cmd;
5440    const char *cmdname;
5441    int c;
5442    static const struct option long_options[] = {
5443        {"help", no_argument, 0, 'h'},
5444        {"version", no_argument, 0, 'V'},
5445        {"trace", required_argument, NULL, 'T'},
5446        {0, 0, 0, 0}
5447    };
5448
5449#ifdef CONFIG_POSIX
5450    signal(SIGPIPE, SIG_IGN);
5451#endif
5452
5453    socket_init();
5454    error_init(argv[0]);
5455    module_call_init(MODULE_INIT_TRACE);
5456    qemu_init_exec_dir(argv[0]);
5457
5458    qemu_init_main_loop(&error_fatal);
5459
5460    qcrypto_init(&error_fatal);
5461
5462    module_call_init(MODULE_INIT_QOM);
5463    bdrv_init();
5464    if (argc < 2) {
5465        error_exit("Not enough arguments");
5466    }
5467
5468    qemu_add_opts(&qemu_source_opts);
5469    qemu_add_opts(&qemu_trace_opts);
5470
5471    while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5472        switch (c) {
5473        case ':':
5474            missing_argument(argv[optind - 1]);
5475            return 0;
5476        case '?':
5477            unrecognized_option(argv[optind - 1]);
5478            return 0;
5479        case 'h':
5480            help();
5481            return 0;
5482        case 'V':
5483            printf(QEMU_IMG_VERSION);
5484            return 0;
5485        case 'T':
5486            trace_opt_parse(optarg);
5487            break;
5488        }
5489    }
5490
5491    cmdname = argv[optind];
5492
5493    /* reset getopt_long scanning */
5494    argc -= optind;
5495    if (argc < 1) {
5496        return 0;
5497    }
5498    argv += optind;
5499    qemu_reset_optind();
5500
5501    if (!trace_init_backends()) {
5502        exit(1);
5503    }
5504    trace_init_file();
5505    qemu_set_log(LOG_TRACE, &error_fatal);
5506
5507    /* find the command */
5508    for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5509        if (!strcmp(cmdname, cmd->name)) {
5510            return cmd->handler(argc, argv);
5511        }
5512    }
5513
5514    /* not found */
5515    error_exit("Command not found: %s", cmdname);
5516}
5517