qemu/block/qcow2.c
<<
>>
Prefs
   1/*
   2 * Block driver for the QCOW version 2 format
   3 *
   4 * Copyright (c) 2004-2006 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26
  27#include "block/qdict.h"
  28#include "sysemu/block-backend.h"
  29#include "qemu/main-loop.h"
  30#include "qemu/module.h"
  31#include "qcow2.h"
  32#include "qemu/error-report.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-events-block-core.h"
  35#include "qapi/qmp/qdict.h"
  36#include "qapi/qmp/qstring.h"
  37#include "trace.h"
  38#include "qemu/option_int.h"
  39#include "qemu/cutils.h"
  40#include "qemu/bswap.h"
  41#include "qapi/qobject-input-visitor.h"
  42#include "qapi/qapi-visit-block-core.h"
  43#include "crypto.h"
  44#include "block/aio_task.h"
  45
  46/*
  47  Differences with QCOW:
  48
  49  - Support for multiple incremental snapshots.
  50  - Memory management by reference counts.
  51  - Clusters which have a reference count of one have the bit
  52    QCOW_OFLAG_COPIED to optimize write performance.
  53  - Size of compressed clusters is stored in sectors to reduce bit usage
  54    in the cluster offsets.
  55  - Support for storing additional data (such as the VM state) in the
  56    snapshots.
  57  - If a backing store is used, the cluster size is not constrained
  58    (could be backported to QCOW).
  59  - L2 tables have always a size of one cluster.
  60*/
  61
  62
  63typedef struct {
  64    uint32_t magic;
  65    uint32_t len;
  66} QEMU_PACKED QCowExtension;
  67
  68#define  QCOW2_EXT_MAGIC_END 0
  69#define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
  70#define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
  71#define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
  72#define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
  73#define  QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
  74
  75static int coroutine_fn
  76qcow2_co_preadv_compressed(BlockDriverState *bs,
  77                           uint64_t file_cluster_offset,
  78                           uint64_t offset,
  79                           uint64_t bytes,
  80                           QEMUIOVector *qiov,
  81                           size_t qiov_offset);
  82
  83static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
  84{
  85    const QCowHeader *cow_header = (const void *)buf;
  86
  87    if (buf_size >= sizeof(QCowHeader) &&
  88        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
  89        be32_to_cpu(cow_header->version) >= 2)
  90        return 100;
  91    else
  92        return 0;
  93}
  94
  95
  96static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
  97                                          uint8_t *buf, size_t buflen,
  98                                          void *opaque, Error **errp)
  99{
 100    BlockDriverState *bs = opaque;
 101    BDRVQcow2State *s = bs->opaque;
 102    ssize_t ret;
 103
 104    if ((offset + buflen) > s->crypto_header.length) {
 105        error_setg(errp, "Request for data outside of extension header");
 106        return -1;
 107    }
 108
 109    ret = bdrv_pread(bs->file,
 110                     s->crypto_header.offset + offset, buf, buflen);
 111    if (ret < 0) {
 112        error_setg_errno(errp, -ret, "Could not read encryption header");
 113        return -1;
 114    }
 115    return ret;
 116}
 117
 118
 119static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
 120                                          void *opaque, Error **errp)
 121{
 122    BlockDriverState *bs = opaque;
 123    BDRVQcow2State *s = bs->opaque;
 124    int64_t ret;
 125    int64_t clusterlen;
 126
 127    ret = qcow2_alloc_clusters(bs, headerlen);
 128    if (ret < 0) {
 129        error_setg_errno(errp, -ret,
 130                         "Cannot allocate cluster for LUKS header size %zu",
 131                         headerlen);
 132        return -1;
 133    }
 134
 135    s->crypto_header.length = headerlen;
 136    s->crypto_header.offset = ret;
 137
 138    /* Zero fill remaining space in cluster so it has predictable
 139     * content in case of future spec changes */
 140    clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
 141    assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
 142    ret = bdrv_pwrite_zeroes(bs->file,
 143                             ret + headerlen,
 144                             clusterlen - headerlen, 0);
 145    if (ret < 0) {
 146        error_setg_errno(errp, -ret, "Could not zero fill encryption header");
 147        return -1;
 148    }
 149
 150    return ret;
 151}
 152
 153
 154static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
 155                                           const uint8_t *buf, size_t buflen,
 156                                           void *opaque, Error **errp)
 157{
 158    BlockDriverState *bs = opaque;
 159    BDRVQcow2State *s = bs->opaque;
 160    ssize_t ret;
 161
 162    if ((offset + buflen) > s->crypto_header.length) {
 163        error_setg(errp, "Request for data outside of extension header");
 164        return -1;
 165    }
 166
 167    ret = bdrv_pwrite(bs->file,
 168                      s->crypto_header.offset + offset, buf, buflen);
 169    if (ret < 0) {
 170        error_setg_errno(errp, -ret, "Could not read encryption header");
 171        return -1;
 172    }
 173    return ret;
 174}
 175
 176
 177/* 
 178 * read qcow2 extension and fill bs
 179 * start reading from start_offset
 180 * finish reading upon magic of value 0 or when end_offset reached
 181 * unknown magic is skipped (future extension this version knows nothing about)
 182 * return 0 upon success, non-0 otherwise
 183 */
 184static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
 185                                 uint64_t end_offset, void **p_feature_table,
 186                                 int flags, bool *need_update_header,
 187                                 Error **errp)
 188{
 189    BDRVQcow2State *s = bs->opaque;
 190    QCowExtension ext;
 191    uint64_t offset;
 192    int ret;
 193    Qcow2BitmapHeaderExt bitmaps_ext;
 194
 195    if (need_update_header != NULL) {
 196        *need_update_header = false;
 197    }
 198
 199#ifdef DEBUG_EXT
 200    printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
 201#endif
 202    offset = start_offset;
 203    while (offset < end_offset) {
 204
 205#ifdef DEBUG_EXT
 206        /* Sanity check */
 207        if (offset > s->cluster_size)
 208            printf("qcow2_read_extension: suspicious offset %lu\n", offset);
 209
 210        printf("attempting to read extended header in offset %lu\n", offset);
 211#endif
 212
 213        ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
 214        if (ret < 0) {
 215            error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
 216                             "pread fail from offset %" PRIu64, offset);
 217            return 1;
 218        }
 219        ext.magic = be32_to_cpu(ext.magic);
 220        ext.len = be32_to_cpu(ext.len);
 221        offset += sizeof(ext);
 222#ifdef DEBUG_EXT
 223        printf("ext.magic = 0x%x\n", ext.magic);
 224#endif
 225        if (offset > end_offset || ext.len > end_offset - offset) {
 226            error_setg(errp, "Header extension too large");
 227            return -EINVAL;
 228        }
 229
 230        switch (ext.magic) {
 231        case QCOW2_EXT_MAGIC_END:
 232            return 0;
 233
 234        case QCOW2_EXT_MAGIC_BACKING_FORMAT:
 235            if (ext.len >= sizeof(bs->backing_format)) {
 236                error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
 237                           " too large (>=%zu)", ext.len,
 238                           sizeof(bs->backing_format));
 239                return 2;
 240            }
 241            ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
 242            if (ret < 0) {
 243                error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
 244                                 "Could not read format name");
 245                return 3;
 246            }
 247            bs->backing_format[ext.len] = '\0';
 248            s->image_backing_format = g_strdup(bs->backing_format);
 249#ifdef DEBUG_EXT
 250            printf("Qcow2: Got format extension %s\n", bs->backing_format);
 251#endif
 252            break;
 253
 254        case QCOW2_EXT_MAGIC_FEATURE_TABLE:
 255            if (p_feature_table != NULL) {
 256                void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
 257                ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
 258                if (ret < 0) {
 259                    error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
 260                                     "Could not read table");
 261                    return ret;
 262                }
 263
 264                *p_feature_table = feature_table;
 265            }
 266            break;
 267
 268        case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
 269            unsigned int cflags = 0;
 270            if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
 271                error_setg(errp, "CRYPTO header extension only "
 272                           "expected with LUKS encryption method");
 273                return -EINVAL;
 274            }
 275            if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
 276                error_setg(errp, "CRYPTO header extension size %u, "
 277                           "but expected size %zu", ext.len,
 278                           sizeof(Qcow2CryptoHeaderExtension));
 279                return -EINVAL;
 280            }
 281
 282            ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len);
 283            if (ret < 0) {
 284                error_setg_errno(errp, -ret,
 285                                 "Unable to read CRYPTO header extension");
 286                return ret;
 287            }
 288            s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
 289            s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
 290
 291            if ((s->crypto_header.offset % s->cluster_size) != 0) {
 292                error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
 293                           "not a multiple of cluster size '%u'",
 294                           s->crypto_header.offset, s->cluster_size);
 295                return -EINVAL;
 296            }
 297
 298            if (flags & BDRV_O_NO_IO) {
 299                cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
 300            }
 301            s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
 302                                           qcow2_crypto_hdr_read_func,
 303                                           bs, cflags, QCOW2_MAX_THREADS, errp);
 304            if (!s->crypto) {
 305                return -EINVAL;
 306            }
 307        }   break;
 308
 309        case QCOW2_EXT_MAGIC_BITMAPS:
 310            if (ext.len != sizeof(bitmaps_ext)) {
 311                error_setg_errno(errp, -ret, "bitmaps_ext: "
 312                                 "Invalid extension length");
 313                return -EINVAL;
 314            }
 315
 316            if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
 317                if (s->qcow_version < 3) {
 318                    /* Let's be a bit more specific */
 319                    warn_report("This qcow2 v2 image contains bitmaps, but "
 320                                "they may have been modified by a program "
 321                                "without persistent bitmap support; so now "
 322                                "they must all be considered inconsistent");
 323                } else {
 324                    warn_report("a program lacking bitmap support "
 325                                "modified this file, so all bitmaps are now "
 326                                "considered inconsistent");
 327                }
 328                error_printf("Some clusters may be leaked, "
 329                             "run 'qemu-img check -r' on the image "
 330                             "file to fix.");
 331                if (need_update_header != NULL) {
 332                    /* Updating is needed to drop invalid bitmap extension. */
 333                    *need_update_header = true;
 334                }
 335                break;
 336            }
 337
 338            ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len);
 339            if (ret < 0) {
 340                error_setg_errno(errp, -ret, "bitmaps_ext: "
 341                                 "Could not read ext header");
 342                return ret;
 343            }
 344
 345            if (bitmaps_ext.reserved32 != 0) {
 346                error_setg_errno(errp, -ret, "bitmaps_ext: "
 347                                 "Reserved field is not zero");
 348                return -EINVAL;
 349            }
 350
 351            bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps);
 352            bitmaps_ext.bitmap_directory_size =
 353                be64_to_cpu(bitmaps_ext.bitmap_directory_size);
 354            bitmaps_ext.bitmap_directory_offset =
 355                be64_to_cpu(bitmaps_ext.bitmap_directory_offset);
 356
 357            if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
 358                error_setg(errp,
 359                           "bitmaps_ext: Image has %" PRIu32 " bitmaps, "
 360                           "exceeding the QEMU supported maximum of %d",
 361                           bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
 362                return -EINVAL;
 363            }
 364
 365            if (bitmaps_ext.nb_bitmaps == 0) {
 366                error_setg(errp, "found bitmaps extension with zero bitmaps");
 367                return -EINVAL;
 368            }
 369
 370            if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) {
 371                error_setg(errp, "bitmaps_ext: "
 372                                 "invalid bitmap directory offset");
 373                return -EINVAL;
 374            }
 375
 376            if (bitmaps_ext.bitmap_directory_size >
 377                QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
 378                error_setg(errp, "bitmaps_ext: "
 379                                 "bitmap directory size (%" PRIu64 ") exceeds "
 380                                 "the maximum supported size (%d)",
 381                                 bitmaps_ext.bitmap_directory_size,
 382                                 QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
 383                return -EINVAL;
 384            }
 385
 386            s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
 387            s->bitmap_directory_offset =
 388                    bitmaps_ext.bitmap_directory_offset;
 389            s->bitmap_directory_size =
 390                    bitmaps_ext.bitmap_directory_size;
 391
 392#ifdef DEBUG_EXT
 393            printf("Qcow2: Got bitmaps extension: "
 394                   "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
 395                   s->bitmap_directory_offset, s->nb_bitmaps);
 396#endif
 397            break;
 398
 399        case QCOW2_EXT_MAGIC_DATA_FILE:
 400        {
 401            s->image_data_file = g_malloc0(ext.len + 1);
 402            ret = bdrv_pread(bs->file, offset, s->image_data_file, ext.len);
 403            if (ret < 0) {
 404                error_setg_errno(errp, -ret,
 405                                 "ERROR: Could not read data file name");
 406                return ret;
 407            }
 408#ifdef DEBUG_EXT
 409            printf("Qcow2: Got external data file %s\n", s->image_data_file);
 410#endif
 411            break;
 412        }
 413
 414        default:
 415            /* unknown magic - save it in case we need to rewrite the header */
 416            /* If you add a new feature, make sure to also update the fast
 417             * path of qcow2_make_empty() to deal with it. */
 418            {
 419                Qcow2UnknownHeaderExtension *uext;
 420
 421                uext = g_malloc0(sizeof(*uext)  + ext.len);
 422                uext->magic = ext.magic;
 423                uext->len = ext.len;
 424                QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
 425
 426                ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
 427                if (ret < 0) {
 428                    error_setg_errno(errp, -ret, "ERROR: unknown extension: "
 429                                     "Could not read data");
 430                    return ret;
 431                }
 432            }
 433            break;
 434        }
 435
 436        offset += ((ext.len + 7) & ~7);
 437    }
 438
 439    return 0;
 440}
 441
 442static void cleanup_unknown_header_ext(BlockDriverState *bs)
 443{
 444    BDRVQcow2State *s = bs->opaque;
 445    Qcow2UnknownHeaderExtension *uext, *next;
 446
 447    QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
 448        QLIST_REMOVE(uext, next);
 449        g_free(uext);
 450    }
 451}
 452
 453static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
 454                                       uint64_t mask)
 455{
 456    char *features = g_strdup("");
 457    char *old;
 458
 459    while (table && table->name[0] != '\0') {
 460        if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
 461            if (mask & (1ULL << table->bit)) {
 462                old = features;
 463                features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
 464                                           table->name);
 465                g_free(old);
 466                mask &= ~(1ULL << table->bit);
 467            }
 468        }
 469        table++;
 470    }
 471
 472    if (mask) {
 473        old = features;
 474        features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
 475                                   old, *old ? ", " : "", mask);
 476        g_free(old);
 477    }
 478
 479    error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
 480    g_free(features);
 481}
 482
 483/*
 484 * Sets the dirty bit and flushes afterwards if necessary.
 485 *
 486 * The incompatible_features bit is only set if the image file header was
 487 * updated successfully.  Therefore it is not required to check the return
 488 * value of this function.
 489 */
 490int qcow2_mark_dirty(BlockDriverState *bs)
 491{
 492    BDRVQcow2State *s = bs->opaque;
 493    uint64_t val;
 494    int ret;
 495
 496    assert(s->qcow_version >= 3);
 497
 498    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
 499        return 0; /* already dirty */
 500    }
 501
 502    val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
 503    ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
 504                      &val, sizeof(val));
 505    if (ret < 0) {
 506        return ret;
 507    }
 508    ret = bdrv_flush(bs->file->bs);
 509    if (ret < 0) {
 510        return ret;
 511    }
 512
 513    /* Only treat image as dirty if the header was updated successfully */
 514    s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
 515    return 0;
 516}
 517
 518/*
 519 * Clears the dirty bit and flushes before if necessary.  Only call this
 520 * function when there are no pending requests, it does not guard against
 521 * concurrent requests dirtying the image.
 522 */
 523static int qcow2_mark_clean(BlockDriverState *bs)
 524{
 525    BDRVQcow2State *s = bs->opaque;
 526
 527    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
 528        int ret;
 529
 530        s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
 531
 532        ret = qcow2_flush_caches(bs);
 533        if (ret < 0) {
 534            return ret;
 535        }
 536
 537        return qcow2_update_header(bs);
 538    }
 539    return 0;
 540}
 541
 542/*
 543 * Marks the image as corrupt.
 544 */
 545int qcow2_mark_corrupt(BlockDriverState *bs)
 546{
 547    BDRVQcow2State *s = bs->opaque;
 548
 549    s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
 550    return qcow2_update_header(bs);
 551}
 552
 553/*
 554 * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
 555 * before if necessary.
 556 */
 557int qcow2_mark_consistent(BlockDriverState *bs)
 558{
 559    BDRVQcow2State *s = bs->opaque;
 560
 561    if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
 562        int ret = qcow2_flush_caches(bs);
 563        if (ret < 0) {
 564            return ret;
 565        }
 566
 567        s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
 568        return qcow2_update_header(bs);
 569    }
 570    return 0;
 571}
 572
 573static void qcow2_add_check_result(BdrvCheckResult *out,
 574                                   const BdrvCheckResult *src,
 575                                   bool set_allocation_info)
 576{
 577    out->corruptions += src->corruptions;
 578    out->leaks += src->leaks;
 579    out->check_errors += src->check_errors;
 580    out->corruptions_fixed += src->corruptions_fixed;
 581    out->leaks_fixed += src->leaks_fixed;
 582
 583    if (set_allocation_info) {
 584        out->image_end_offset = src->image_end_offset;
 585        out->bfi = src->bfi;
 586    }
 587}
 588
 589static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
 590                                              BdrvCheckResult *result,
 591                                              BdrvCheckMode fix)
 592{
 593    BdrvCheckResult snapshot_res = {};
 594    BdrvCheckResult refcount_res = {};
 595    int ret;
 596
 597    memset(result, 0, sizeof(*result));
 598
 599    ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix);
 600    if (ret < 0) {
 601        qcow2_add_check_result(result, &snapshot_res, false);
 602        return ret;
 603    }
 604
 605    ret = qcow2_check_refcounts(bs, &refcount_res, fix);
 606    qcow2_add_check_result(result, &refcount_res, true);
 607    if (ret < 0) {
 608        qcow2_add_check_result(result, &snapshot_res, false);
 609        return ret;
 610    }
 611
 612    ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix);
 613    qcow2_add_check_result(result, &snapshot_res, false);
 614    if (ret < 0) {
 615        return ret;
 616    }
 617
 618    if (fix && result->check_errors == 0 && result->corruptions == 0) {
 619        ret = qcow2_mark_clean(bs);
 620        if (ret < 0) {
 621            return ret;
 622        }
 623        return qcow2_mark_consistent(bs);
 624    }
 625    return ret;
 626}
 627
 628static int coroutine_fn qcow2_co_check(BlockDriverState *bs,
 629                                       BdrvCheckResult *result,
 630                                       BdrvCheckMode fix)
 631{
 632    BDRVQcow2State *s = bs->opaque;
 633    int ret;
 634
 635    qemu_co_mutex_lock(&s->lock);
 636    ret = qcow2_co_check_locked(bs, result, fix);
 637    qemu_co_mutex_unlock(&s->lock);
 638    return ret;
 639}
 640
 641int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
 642                         uint64_t entries, size_t entry_len,
 643                         int64_t max_size_bytes, const char *table_name,
 644                         Error **errp)
 645{
 646    BDRVQcow2State *s = bs->opaque;
 647
 648    if (entries > max_size_bytes / entry_len) {
 649        error_setg(errp, "%s too large", table_name);
 650        return -EFBIG;
 651    }
 652
 653    /* Use signed INT64_MAX as the maximum even for uint64_t header fields,
 654     * because values will be passed to qemu functions taking int64_t. */
 655    if ((INT64_MAX - entries * entry_len < offset) ||
 656        (offset_into_cluster(s, offset) != 0)) {
 657        error_setg(errp, "%s offset invalid", table_name);
 658        return -EINVAL;
 659    }
 660
 661    return 0;
 662}
 663
 664static const char *const mutable_opts[] = {
 665    QCOW2_OPT_LAZY_REFCOUNTS,
 666    QCOW2_OPT_DISCARD_REQUEST,
 667    QCOW2_OPT_DISCARD_SNAPSHOT,
 668    QCOW2_OPT_DISCARD_OTHER,
 669    QCOW2_OPT_OVERLAP,
 670    QCOW2_OPT_OVERLAP_TEMPLATE,
 671    QCOW2_OPT_OVERLAP_MAIN_HEADER,
 672    QCOW2_OPT_OVERLAP_ACTIVE_L1,
 673    QCOW2_OPT_OVERLAP_ACTIVE_L2,
 674    QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
 675    QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
 676    QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
 677    QCOW2_OPT_OVERLAP_INACTIVE_L1,
 678    QCOW2_OPT_OVERLAP_INACTIVE_L2,
 679    QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
 680    QCOW2_OPT_CACHE_SIZE,
 681    QCOW2_OPT_L2_CACHE_SIZE,
 682    QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
 683    QCOW2_OPT_REFCOUNT_CACHE_SIZE,
 684    QCOW2_OPT_CACHE_CLEAN_INTERVAL,
 685    NULL
 686};
 687
 688static QemuOptsList qcow2_runtime_opts = {
 689    .name = "qcow2",
 690    .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
 691    .desc = {
 692        {
 693            .name = QCOW2_OPT_LAZY_REFCOUNTS,
 694            .type = QEMU_OPT_BOOL,
 695            .help = "Postpone refcount updates",
 696        },
 697        {
 698            .name = QCOW2_OPT_DISCARD_REQUEST,
 699            .type = QEMU_OPT_BOOL,
 700            .help = "Pass guest discard requests to the layer below",
 701        },
 702        {
 703            .name = QCOW2_OPT_DISCARD_SNAPSHOT,
 704            .type = QEMU_OPT_BOOL,
 705            .help = "Generate discard requests when snapshot related space "
 706                    "is freed",
 707        },
 708        {
 709            .name = QCOW2_OPT_DISCARD_OTHER,
 710            .type = QEMU_OPT_BOOL,
 711            .help = "Generate discard requests when other clusters are freed",
 712        },
 713        {
 714            .name = QCOW2_OPT_OVERLAP,
 715            .type = QEMU_OPT_STRING,
 716            .help = "Selects which overlap checks to perform from a range of "
 717                    "templates (none, constant, cached, all)",
 718        },
 719        {
 720            .name = QCOW2_OPT_OVERLAP_TEMPLATE,
 721            .type = QEMU_OPT_STRING,
 722            .help = "Selects which overlap checks to perform from a range of "
 723                    "templates (none, constant, cached, all)",
 724        },
 725        {
 726            .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
 727            .type = QEMU_OPT_BOOL,
 728            .help = "Check for unintended writes into the main qcow2 header",
 729        },
 730        {
 731            .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
 732            .type = QEMU_OPT_BOOL,
 733            .help = "Check for unintended writes into the active L1 table",
 734        },
 735        {
 736            .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
 737            .type = QEMU_OPT_BOOL,
 738            .help = "Check for unintended writes into an active L2 table",
 739        },
 740        {
 741            .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
 742            .type = QEMU_OPT_BOOL,
 743            .help = "Check for unintended writes into the refcount table",
 744        },
 745        {
 746            .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
 747            .type = QEMU_OPT_BOOL,
 748            .help = "Check for unintended writes into a refcount block",
 749        },
 750        {
 751            .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
 752            .type = QEMU_OPT_BOOL,
 753            .help = "Check for unintended writes into the snapshot table",
 754        },
 755        {
 756            .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
 757            .type = QEMU_OPT_BOOL,
 758            .help = "Check for unintended writes into an inactive L1 table",
 759        },
 760        {
 761            .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
 762            .type = QEMU_OPT_BOOL,
 763            .help = "Check for unintended writes into an inactive L2 table",
 764        },
 765        {
 766            .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
 767            .type = QEMU_OPT_BOOL,
 768            .help = "Check for unintended writes into the bitmap directory",
 769        },
 770        {
 771            .name = QCOW2_OPT_CACHE_SIZE,
 772            .type = QEMU_OPT_SIZE,
 773            .help = "Maximum combined metadata (L2 tables and refcount blocks) "
 774                    "cache size",
 775        },
 776        {
 777            .name = QCOW2_OPT_L2_CACHE_SIZE,
 778            .type = QEMU_OPT_SIZE,
 779            .help = "Maximum L2 table cache size",
 780        },
 781        {
 782            .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
 783            .type = QEMU_OPT_SIZE,
 784            .help = "Size of each entry in the L2 cache",
 785        },
 786        {
 787            .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
 788            .type = QEMU_OPT_SIZE,
 789            .help = "Maximum refcount block cache size",
 790        },
 791        {
 792            .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
 793            .type = QEMU_OPT_NUMBER,
 794            .help = "Clean unused cache entries after this time (in seconds)",
 795        },
 796        BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
 797            "ID of secret providing qcow2 AES key or LUKS passphrase"),
 798        { /* end of list */ }
 799    },
 800};
 801
 802static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
 803    [QCOW2_OL_MAIN_HEADER_BITNR]      = QCOW2_OPT_OVERLAP_MAIN_HEADER,
 804    [QCOW2_OL_ACTIVE_L1_BITNR]        = QCOW2_OPT_OVERLAP_ACTIVE_L1,
 805    [QCOW2_OL_ACTIVE_L2_BITNR]        = QCOW2_OPT_OVERLAP_ACTIVE_L2,
 806    [QCOW2_OL_REFCOUNT_TABLE_BITNR]   = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
 807    [QCOW2_OL_REFCOUNT_BLOCK_BITNR]   = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
 808    [QCOW2_OL_SNAPSHOT_TABLE_BITNR]   = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
 809    [QCOW2_OL_INACTIVE_L1_BITNR]      = QCOW2_OPT_OVERLAP_INACTIVE_L1,
 810    [QCOW2_OL_INACTIVE_L2_BITNR]      = QCOW2_OPT_OVERLAP_INACTIVE_L2,
 811    [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
 812};
 813
 814static void cache_clean_timer_cb(void *opaque)
 815{
 816    BlockDriverState *bs = opaque;
 817    BDRVQcow2State *s = bs->opaque;
 818    qcow2_cache_clean_unused(s->l2_table_cache);
 819    qcow2_cache_clean_unused(s->refcount_block_cache);
 820    timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
 821              (int64_t) s->cache_clean_interval * 1000);
 822}
 823
 824static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
 825{
 826    BDRVQcow2State *s = bs->opaque;
 827    if (s->cache_clean_interval > 0) {
 828        s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
 829                                             SCALE_MS, cache_clean_timer_cb,
 830                                             bs);
 831        timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
 832                  (int64_t) s->cache_clean_interval * 1000);
 833    }
 834}
 835
 836static void cache_clean_timer_del(BlockDriverState *bs)
 837{
 838    BDRVQcow2State *s = bs->opaque;
 839    if (s->cache_clean_timer) {
 840        timer_del(s->cache_clean_timer);
 841        timer_free(s->cache_clean_timer);
 842        s->cache_clean_timer = NULL;
 843    }
 844}
 845
 846static void qcow2_detach_aio_context(BlockDriverState *bs)
 847{
 848    cache_clean_timer_del(bs);
 849}
 850
 851static void qcow2_attach_aio_context(BlockDriverState *bs,
 852                                     AioContext *new_context)
 853{
 854    cache_clean_timer_init(bs, new_context);
 855}
 856
 857static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
 858                             uint64_t *l2_cache_size,
 859                             uint64_t *l2_cache_entry_size,
 860                             uint64_t *refcount_cache_size, Error **errp)
 861{
 862    BDRVQcow2State *s = bs->opaque;
 863    uint64_t combined_cache_size, l2_cache_max_setting;
 864    bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
 865    bool l2_cache_entry_size_set;
 866    int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
 867    uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
 868    uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
 869    /* An L2 table is always one cluster in size so the max cache size
 870     * should be a multiple of the cluster size. */
 871    uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t),
 872                                     s->cluster_size);
 873
 874    combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
 875    l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
 876    refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
 877    l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE);
 878
 879    combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
 880    l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE,
 881                                             DEFAULT_L2_CACHE_MAX_SIZE);
 882    *refcount_cache_size = qemu_opt_get_size(opts,
 883                                             QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
 884
 885    *l2_cache_entry_size = qemu_opt_get_size(
 886        opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
 887
 888    *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);
 889
 890    if (combined_cache_size_set) {
 891        if (l2_cache_size_set && refcount_cache_size_set) {
 892            error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
 893                       " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
 894                       "at the same time");
 895            return;
 896        } else if (l2_cache_size_set &&
 897                   (l2_cache_max_setting > combined_cache_size)) {
 898            error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
 899                       QCOW2_OPT_CACHE_SIZE);
 900            return;
 901        } else if (*refcount_cache_size > combined_cache_size) {
 902            error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
 903                       QCOW2_OPT_CACHE_SIZE);
 904            return;
 905        }
 906
 907        if (l2_cache_size_set) {
 908            *refcount_cache_size = combined_cache_size - *l2_cache_size;
 909        } else if (refcount_cache_size_set) {
 910            *l2_cache_size = combined_cache_size - *refcount_cache_size;
 911        } else {
 912            /* Assign as much memory as possible to the L2 cache, and
 913             * use the remainder for the refcount cache */
 914            if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
 915                *l2_cache_size = max_l2_cache;
 916                *refcount_cache_size = combined_cache_size - *l2_cache_size;
 917            } else {
 918                *refcount_cache_size =
 919                    MIN(combined_cache_size, min_refcount_cache);
 920                *l2_cache_size = combined_cache_size - *refcount_cache_size;
 921            }
 922        }
 923    }
 924
 925    /*
 926     * If the L2 cache is not enough to cover the whole disk then
 927     * default to 4KB entries. Smaller entries reduce the cost of
 928     * loads and evictions and increase I/O performance.
 929     */
 930    if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) {
 931        *l2_cache_entry_size = MIN(s->cluster_size, 4096);
 932    }
 933
 934    /* l2_cache_size and refcount_cache_size are ensured to have at least
 935     * their minimum values in qcow2_update_options_prepare() */
 936
 937    if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
 938        *l2_cache_entry_size > s->cluster_size ||
 939        !is_power_of_2(*l2_cache_entry_size)) {
 940        error_setg(errp, "L2 cache entry size must be a power of two "
 941                   "between %d and the cluster size (%d)",
 942                   1 << MIN_CLUSTER_BITS, s->cluster_size);
 943        return;
 944    }
 945}
 946
 947typedef struct Qcow2ReopenState {
 948    Qcow2Cache *l2_table_cache;
 949    Qcow2Cache *refcount_block_cache;
 950    int l2_slice_size; /* Number of entries in a slice of the L2 table */
 951    bool use_lazy_refcounts;
 952    int overlap_check;
 953    bool discard_passthrough[QCOW2_DISCARD_MAX];
 954    uint64_t cache_clean_interval;
 955    QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
 956} Qcow2ReopenState;
 957
 958static int qcow2_update_options_prepare(BlockDriverState *bs,
 959                                        Qcow2ReopenState *r,
 960                                        QDict *options, int flags,
 961                                        Error **errp)
 962{
 963    BDRVQcow2State *s = bs->opaque;
 964    QemuOpts *opts = NULL;
 965    const char *opt_overlap_check, *opt_overlap_check_template;
 966    int overlap_check_template = 0;
 967    uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
 968    int i;
 969    const char *encryptfmt;
 970    QDict *encryptopts = NULL;
 971    Error *local_err = NULL;
 972    int ret;
 973
 974    qdict_extract_subqdict(options, &encryptopts, "encrypt.");
 975    encryptfmt = qdict_get_try_str(encryptopts, "format");
 976
 977    opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
 978    qemu_opts_absorb_qdict(opts, options, &local_err);
 979    if (local_err) {
 980        error_propagate(errp, local_err);
 981        ret = -EINVAL;
 982        goto fail;
 983    }
 984
 985    /* get L2 table/refcount block cache size from command line options */
 986    read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
 987                     &refcount_cache_size, &local_err);
 988    if (local_err) {
 989        error_propagate(errp, local_err);
 990        ret = -EINVAL;
 991        goto fail;
 992    }
 993
 994    l2_cache_size /= l2_cache_entry_size;
 995    if (l2_cache_size < MIN_L2_CACHE_SIZE) {
 996        l2_cache_size = MIN_L2_CACHE_SIZE;
 997    }
 998    if (l2_cache_size > INT_MAX) {
 999        error_setg(errp, "L2 cache size too big");
1000        ret = -EINVAL;
1001        goto fail;
1002    }
1003
1004    refcount_cache_size /= s->cluster_size;
1005    if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
1006        refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
1007    }
1008    if (refcount_cache_size > INT_MAX) {
1009        error_setg(errp, "Refcount cache size too big");
1010        ret = -EINVAL;
1011        goto fail;
1012    }
1013
1014    /* alloc new L2 table/refcount block cache, flush old one */
1015    if (s->l2_table_cache) {
1016        ret = qcow2_cache_flush(bs, s->l2_table_cache);
1017        if (ret) {
1018            error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
1019            goto fail;
1020        }
1021    }
1022
1023    if (s->refcount_block_cache) {
1024        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
1025        if (ret) {
1026            error_setg_errno(errp, -ret,
1027                             "Failed to flush the refcount block cache");
1028            goto fail;
1029        }
1030    }
1031
1032    r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t);
1033    r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
1034                                           l2_cache_entry_size);
1035    r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
1036                                                 s->cluster_size);
1037    if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
1038        error_setg(errp, "Could not allocate metadata caches");
1039        ret = -ENOMEM;
1040        goto fail;
1041    }
1042
1043    /* New interval for cache cleanup timer */
1044    r->cache_clean_interval =
1045        qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
1046                            DEFAULT_CACHE_CLEAN_INTERVAL);
1047#ifndef CONFIG_LINUX
1048    if (r->cache_clean_interval != 0) {
1049        error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
1050                   " not supported on this host");
1051        ret = -EINVAL;
1052        goto fail;
1053    }
1054#endif
1055    if (r->cache_clean_interval > UINT_MAX) {
1056        error_setg(errp, "Cache clean interval too big");
1057        ret = -EINVAL;
1058        goto fail;
1059    }
1060
1061    /* lazy-refcounts; flush if going from enabled to disabled */
1062    r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
1063        (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
1064    if (r->use_lazy_refcounts && s->qcow_version < 3) {
1065        error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
1066                   "qemu 1.1 compatibility level");
1067        ret = -EINVAL;
1068        goto fail;
1069    }
1070
1071    if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
1072        ret = qcow2_mark_clean(bs);
1073        if (ret < 0) {
1074            error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
1075            goto fail;
1076        }
1077    }
1078
1079    /* Overlap check options */
1080    opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
1081    opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
1082    if (opt_overlap_check_template && opt_overlap_check &&
1083        strcmp(opt_overlap_check_template, opt_overlap_check))
1084    {
1085        error_setg(errp, "Conflicting values for qcow2 options '"
1086                   QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
1087                   "' ('%s')", opt_overlap_check, opt_overlap_check_template);
1088        ret = -EINVAL;
1089        goto fail;
1090    }
1091    if (!opt_overlap_check) {
1092        opt_overlap_check = opt_overlap_check_template ?: "cached";
1093    }
1094
1095    if (!strcmp(opt_overlap_check, "none")) {
1096        overlap_check_template = 0;
1097    } else if (!strcmp(opt_overlap_check, "constant")) {
1098        overlap_check_template = QCOW2_OL_CONSTANT;
1099    } else if (!strcmp(opt_overlap_check, "cached")) {
1100        overlap_check_template = QCOW2_OL_CACHED;
1101    } else if (!strcmp(opt_overlap_check, "all")) {
1102        overlap_check_template = QCOW2_OL_ALL;
1103    } else {
1104        error_setg(errp, "Unsupported value '%s' for qcow2 option "
1105                   "'overlap-check'. Allowed are any of the following: "
1106                   "none, constant, cached, all", opt_overlap_check);
1107        ret = -EINVAL;
1108        goto fail;
1109    }
1110
1111    r->overlap_check = 0;
1112    for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
1113        /* overlap-check defines a template bitmask, but every flag may be
1114         * overwritten through the associated boolean option */
1115        r->overlap_check |=
1116            qemu_opt_get_bool(opts, overlap_bool_option_names[i],
1117                              overlap_check_template & (1 << i)) << i;
1118    }
1119
1120    r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
1121    r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
1122    r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
1123        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
1124                          flags & BDRV_O_UNMAP);
1125    r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
1126        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
1127    r->discard_passthrough[QCOW2_DISCARD_OTHER] =
1128        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
1129
1130    switch (s->crypt_method_header) {
1131    case QCOW_CRYPT_NONE:
1132        if (encryptfmt) {
1133            error_setg(errp, "No encryption in image header, but options "
1134                       "specified format '%s'", encryptfmt);
1135            ret = -EINVAL;
1136            goto fail;
1137        }
1138        break;
1139
1140    case QCOW_CRYPT_AES:
1141        if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
1142            error_setg(errp,
1143                       "Header reported 'aes' encryption format but "
1144                       "options specify '%s'", encryptfmt);
1145            ret = -EINVAL;
1146            goto fail;
1147        }
1148        qdict_put_str(encryptopts, "format", "qcow");
1149        r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1150        break;
1151
1152    case QCOW_CRYPT_LUKS:
1153        if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
1154            error_setg(errp,
1155                       "Header reported 'luks' encryption format but "
1156                       "options specify '%s'", encryptfmt);
1157            ret = -EINVAL;
1158            goto fail;
1159        }
1160        qdict_put_str(encryptopts, "format", "luks");
1161        r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1162        break;
1163
1164    default:
1165        error_setg(errp, "Unsupported encryption method %d",
1166                   s->crypt_method_header);
1167        break;
1168    }
1169    if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) {
1170        ret = -EINVAL;
1171        goto fail;
1172    }
1173
1174    ret = 0;
1175fail:
1176    qobject_unref(encryptopts);
1177    qemu_opts_del(opts);
1178    opts = NULL;
1179    return ret;
1180}
1181
1182static void qcow2_update_options_commit(BlockDriverState *bs,
1183                                        Qcow2ReopenState *r)
1184{
1185    BDRVQcow2State *s = bs->opaque;
1186    int i;
1187
1188    if (s->l2_table_cache) {
1189        qcow2_cache_destroy(s->l2_table_cache);
1190    }
1191    if (s->refcount_block_cache) {
1192        qcow2_cache_destroy(s->refcount_block_cache);
1193    }
1194    s->l2_table_cache = r->l2_table_cache;
1195    s->refcount_block_cache = r->refcount_block_cache;
1196    s->l2_slice_size = r->l2_slice_size;
1197
1198    s->overlap_check = r->overlap_check;
1199    s->use_lazy_refcounts = r->use_lazy_refcounts;
1200
1201    for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
1202        s->discard_passthrough[i] = r->discard_passthrough[i];
1203    }
1204
1205    if (s->cache_clean_interval != r->cache_clean_interval) {
1206        cache_clean_timer_del(bs);
1207        s->cache_clean_interval = r->cache_clean_interval;
1208        cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
1209    }
1210
1211    qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1212    s->crypto_opts = r->crypto_opts;
1213}
1214
1215static void qcow2_update_options_abort(BlockDriverState *bs,
1216                                       Qcow2ReopenState *r)
1217{
1218    if (r->l2_table_cache) {
1219        qcow2_cache_destroy(r->l2_table_cache);
1220    }
1221    if (r->refcount_block_cache) {
1222        qcow2_cache_destroy(r->refcount_block_cache);
1223    }
1224    qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
1225}
1226
1227static int qcow2_update_options(BlockDriverState *bs, QDict *options,
1228                                int flags, Error **errp)
1229{
1230    Qcow2ReopenState r = {};
1231    int ret;
1232
1233    ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
1234    if (ret >= 0) {
1235        qcow2_update_options_commit(bs, &r);
1236    } else {
1237        qcow2_update_options_abort(bs, &r);
1238    }
1239
1240    return ret;
1241}
1242
1243/* Called with s->lock held.  */
1244static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
1245                                      int flags, Error **errp)
1246{
1247    BDRVQcow2State *s = bs->opaque;
1248    unsigned int len, i;
1249    int ret = 0;
1250    QCowHeader header;
1251    Error *local_err = NULL;
1252    uint64_t ext_end;
1253    uint64_t l1_vm_state_index;
1254    bool update_header = false;
1255
1256    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
1257    if (ret < 0) {
1258        error_setg_errno(errp, -ret, "Could not read qcow2 header");
1259        goto fail;
1260    }
1261    header.magic = be32_to_cpu(header.magic);
1262    header.version = be32_to_cpu(header.version);
1263    header.backing_file_offset = be64_to_cpu(header.backing_file_offset);
1264    header.backing_file_size = be32_to_cpu(header.backing_file_size);
1265    header.size = be64_to_cpu(header.size);
1266    header.cluster_bits = be32_to_cpu(header.cluster_bits);
1267    header.crypt_method = be32_to_cpu(header.crypt_method);
1268    header.l1_table_offset = be64_to_cpu(header.l1_table_offset);
1269    header.l1_size = be32_to_cpu(header.l1_size);
1270    header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset);
1271    header.refcount_table_clusters =
1272        be32_to_cpu(header.refcount_table_clusters);
1273    header.snapshots_offset = be64_to_cpu(header.snapshots_offset);
1274    header.nb_snapshots = be32_to_cpu(header.nb_snapshots);
1275
1276    if (header.magic != QCOW_MAGIC) {
1277        error_setg(errp, "Image is not in qcow2 format");
1278        ret = -EINVAL;
1279        goto fail;
1280    }
1281    if (header.version < 2 || header.version > 3) {
1282        error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
1283        ret = -ENOTSUP;
1284        goto fail;
1285    }
1286
1287    s->qcow_version = header.version;
1288
1289    /* Initialise cluster size */
1290    if (header.cluster_bits < MIN_CLUSTER_BITS ||
1291        header.cluster_bits > MAX_CLUSTER_BITS) {
1292        error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
1293                   header.cluster_bits);
1294        ret = -EINVAL;
1295        goto fail;
1296    }
1297
1298    s->cluster_bits = header.cluster_bits;
1299    s->cluster_size = 1 << s->cluster_bits;
1300
1301    /* Initialise version 3 header fields */
1302    if (header.version == 2) {
1303        header.incompatible_features    = 0;
1304        header.compatible_features      = 0;
1305        header.autoclear_features       = 0;
1306        header.refcount_order           = 4;
1307        header.header_length            = 72;
1308    } else {
1309        header.incompatible_features =
1310            be64_to_cpu(header.incompatible_features);
1311        header.compatible_features = be64_to_cpu(header.compatible_features);
1312        header.autoclear_features = be64_to_cpu(header.autoclear_features);
1313        header.refcount_order = be32_to_cpu(header.refcount_order);
1314        header.header_length = be32_to_cpu(header.header_length);
1315
1316        if (header.header_length < 104) {
1317            error_setg(errp, "qcow2 header too short");
1318            ret = -EINVAL;
1319            goto fail;
1320        }
1321    }
1322
1323    if (header.header_length > s->cluster_size) {
1324        error_setg(errp, "qcow2 header exceeds cluster size");
1325        ret = -EINVAL;
1326        goto fail;
1327    }
1328
1329    if (header.header_length > sizeof(header)) {
1330        s->unknown_header_fields_size = header.header_length - sizeof(header);
1331        s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
1332        ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
1333                         s->unknown_header_fields_size);
1334        if (ret < 0) {
1335            error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
1336                             "fields");
1337            goto fail;
1338        }
1339    }
1340
1341    if (header.backing_file_offset > s->cluster_size) {
1342        error_setg(errp, "Invalid backing file offset");
1343        ret = -EINVAL;
1344        goto fail;
1345    }
1346
1347    if (header.backing_file_offset) {
1348        ext_end = header.backing_file_offset;
1349    } else {
1350        ext_end = 1 << header.cluster_bits;
1351    }
1352
1353    /* Handle feature bits */
1354    s->incompatible_features    = header.incompatible_features;
1355    s->compatible_features      = header.compatible_features;
1356    s->autoclear_features       = header.autoclear_features;
1357
1358    if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
1359        void *feature_table = NULL;
1360        qcow2_read_extensions(bs, header.header_length, ext_end,
1361                              &feature_table, flags, NULL, NULL);
1362        report_unsupported_feature(errp, feature_table,
1363                                   s->incompatible_features &
1364                                   ~QCOW2_INCOMPAT_MASK);
1365        ret = -ENOTSUP;
1366        g_free(feature_table);
1367        goto fail;
1368    }
1369
1370    if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
1371        /* Corrupt images may not be written to unless they are being repaired
1372         */
1373        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
1374            error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
1375                       "read/write");
1376            ret = -EACCES;
1377            goto fail;
1378        }
1379    }
1380
1381    /* Check support for various header values */
1382    if (header.refcount_order > 6) {
1383        error_setg(errp, "Reference count entry width too large; may not "
1384                   "exceed 64 bits");
1385        ret = -EINVAL;
1386        goto fail;
1387    }
1388    s->refcount_order = header.refcount_order;
1389    s->refcount_bits = 1 << s->refcount_order;
1390    s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
1391    s->refcount_max += s->refcount_max - 1;
1392
1393    s->crypt_method_header = header.crypt_method;
1394    if (s->crypt_method_header) {
1395        if (bdrv_uses_whitelist() &&
1396            s->crypt_method_header == QCOW_CRYPT_AES) {
1397            error_setg(errp,
1398                       "Use of AES-CBC encrypted qcow2 images is no longer "
1399                       "supported in system emulators");
1400            error_append_hint(errp,
1401                              "You can use 'qemu-img convert' to convert your "
1402                              "image to an alternative supported format, such "
1403                              "as unencrypted qcow2, or raw with the LUKS "
1404                              "format instead.\n");
1405            ret = -ENOSYS;
1406            goto fail;
1407        }
1408
1409        if (s->crypt_method_header == QCOW_CRYPT_AES) {
1410            s->crypt_physical_offset = false;
1411        } else {
1412            /* Assuming LUKS and any future crypt methods we
1413             * add will all use physical offsets, due to the
1414             * fact that the alternative is insecure...  */
1415            s->crypt_physical_offset = true;
1416        }
1417
1418        bs->encrypted = true;
1419    }
1420
1421    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
1422    s->l2_size = 1 << s->l2_bits;
1423    /* 2^(s->refcount_order - 3) is the refcount width in bytes */
1424    s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
1425    s->refcount_block_size = 1 << s->refcount_block_bits;
1426    bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
1427    s->csize_shift = (62 - (s->cluster_bits - 8));
1428    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
1429    s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
1430
1431    s->refcount_table_offset = header.refcount_table_offset;
1432    s->refcount_table_size =
1433        header.refcount_table_clusters << (s->cluster_bits - 3);
1434
1435    if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
1436        error_setg(errp, "Image does not contain a reference count table");
1437        ret = -EINVAL;
1438        goto fail;
1439    }
1440
1441    ret = qcow2_validate_table(bs, s->refcount_table_offset,
1442                               header.refcount_table_clusters,
1443                               s->cluster_size, QCOW_MAX_REFTABLE_SIZE,
1444                               "Reference count table", errp);
1445    if (ret < 0) {
1446        goto fail;
1447    }
1448
1449    if (!(flags & BDRV_O_CHECK)) {
1450        /*
1451         * The total size in bytes of the snapshot table is checked in
1452         * qcow2_read_snapshots() because the size of each snapshot is
1453         * variable and we don't know it yet.
1454         * Here we only check the offset and number of snapshots.
1455         */
1456        ret = qcow2_validate_table(bs, header.snapshots_offset,
1457                                   header.nb_snapshots,
1458                                   sizeof(QCowSnapshotHeader),
1459                                   sizeof(QCowSnapshotHeader) *
1460                                       QCOW_MAX_SNAPSHOTS,
1461                                   "Snapshot table", errp);
1462        if (ret < 0) {
1463            goto fail;
1464        }
1465    }
1466
1467    /* read the level 1 table */
1468    ret = qcow2_validate_table(bs, header.l1_table_offset,
1469                               header.l1_size, sizeof(uint64_t),
1470                               QCOW_MAX_L1_SIZE, "Active L1 table", errp);
1471    if (ret < 0) {
1472        goto fail;
1473    }
1474    s->l1_size = header.l1_size;
1475    s->l1_table_offset = header.l1_table_offset;
1476
1477    l1_vm_state_index = size_to_l1(s, header.size);
1478    if (l1_vm_state_index > INT_MAX) {
1479        error_setg(errp, "Image is too big");
1480        ret = -EFBIG;
1481        goto fail;
1482    }
1483    s->l1_vm_state_index = l1_vm_state_index;
1484
1485    /* the L1 table must contain at least enough entries to put
1486       header.size bytes */
1487    if (s->l1_size < s->l1_vm_state_index) {
1488        error_setg(errp, "L1 table is too small");
1489        ret = -EINVAL;
1490        goto fail;
1491    }
1492
1493    if (s->l1_size > 0) {
1494        s->l1_table = qemu_try_blockalign(bs->file->bs,
1495            ROUND_UP(s->l1_size * sizeof(uint64_t), 512));
1496        if (s->l1_table == NULL) {
1497            error_setg(errp, "Could not allocate L1 table");
1498            ret = -ENOMEM;
1499            goto fail;
1500        }
1501        ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
1502                         s->l1_size * sizeof(uint64_t));
1503        if (ret < 0) {
1504            error_setg_errno(errp, -ret, "Could not read L1 table");
1505            goto fail;
1506        }
1507        for(i = 0;i < s->l1_size; i++) {
1508            s->l1_table[i] = be64_to_cpu(s->l1_table[i]);
1509        }
1510    }
1511
1512    /* Parse driver-specific options */
1513    ret = qcow2_update_options(bs, options, flags, errp);
1514    if (ret < 0) {
1515        goto fail;
1516    }
1517
1518    s->flags = flags;
1519
1520    ret = qcow2_refcount_init(bs);
1521    if (ret != 0) {
1522        error_setg_errno(errp, -ret, "Could not initialize refcount handling");
1523        goto fail;
1524    }
1525
1526    QLIST_INIT(&s->cluster_allocs);
1527    QTAILQ_INIT(&s->discards);
1528
1529    /* read qcow2 extensions */
1530    if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
1531                              flags, &update_header, &local_err)) {
1532        error_propagate(errp, local_err);
1533        ret = -EINVAL;
1534        goto fail;
1535    }
1536
1537    /* Open external data file */
1538    s->data_file = bdrv_open_child(NULL, options, "data-file", bs, &child_file,
1539                                   true, &local_err);
1540    if (local_err) {
1541        error_propagate(errp, local_err);
1542        ret = -EINVAL;
1543        goto fail;
1544    }
1545
1546    if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
1547        if (!s->data_file && s->image_data_file) {
1548            s->data_file = bdrv_open_child(s->image_data_file, options,
1549                                           "data-file", bs, &child_file,
1550                                           false, errp);
1551            if (!s->data_file) {
1552                ret = -EINVAL;
1553                goto fail;
1554            }
1555        }
1556        if (!s->data_file) {
1557            error_setg(errp, "'data-file' is required for this image");
1558            ret = -EINVAL;
1559            goto fail;
1560        }
1561    } else {
1562        if (s->data_file) {
1563            error_setg(errp, "'data-file' can only be set for images with an "
1564                             "external data file");
1565            ret = -EINVAL;
1566            goto fail;
1567        }
1568
1569        s->data_file = bs->file;
1570
1571        if (data_file_is_raw(bs)) {
1572            error_setg(errp, "data-file-raw requires a data file");
1573            ret = -EINVAL;
1574            goto fail;
1575        }
1576    }
1577
1578    /* qcow2_read_extension may have set up the crypto context
1579     * if the crypt method needs a header region, some methods
1580     * don't need header extensions, so must check here
1581     */
1582    if (s->crypt_method_header && !s->crypto) {
1583        if (s->crypt_method_header == QCOW_CRYPT_AES) {
1584            unsigned int cflags = 0;
1585            if (flags & BDRV_O_NO_IO) {
1586                cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
1587            }
1588            s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
1589                                           NULL, NULL, cflags,
1590                                           QCOW2_MAX_THREADS, errp);
1591            if (!s->crypto) {
1592                ret = -EINVAL;
1593                goto fail;
1594            }
1595        } else if (!(flags & BDRV_O_NO_IO)) {
1596            error_setg(errp, "Missing CRYPTO header for crypt method %d",
1597                       s->crypt_method_header);
1598            ret = -EINVAL;
1599            goto fail;
1600        }
1601    }
1602
1603    /* read the backing file name */
1604    if (header.backing_file_offset != 0) {
1605        len = header.backing_file_size;
1606        if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
1607            len >= sizeof(bs->backing_file)) {
1608            error_setg(errp, "Backing file name too long");
1609            ret = -EINVAL;
1610            goto fail;
1611        }
1612        ret = bdrv_pread(bs->file, header.backing_file_offset,
1613                         bs->auto_backing_file, len);
1614        if (ret < 0) {
1615            error_setg_errno(errp, -ret, "Could not read backing file name");
1616            goto fail;
1617        }
1618        bs->auto_backing_file[len] = '\0';
1619        pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1620                bs->auto_backing_file);
1621        s->image_backing_file = g_strdup(bs->auto_backing_file);
1622    }
1623
1624    /*
1625     * Internal snapshots; skip reading them in check mode, because
1626     * we do not need them then, and we do not want to abort because
1627     * of a broken table.
1628     */
1629    if (!(flags & BDRV_O_CHECK)) {
1630        s->snapshots_offset = header.snapshots_offset;
1631        s->nb_snapshots = header.nb_snapshots;
1632
1633        ret = qcow2_read_snapshots(bs, errp);
1634        if (ret < 0) {
1635            goto fail;
1636        }
1637    }
1638
1639    /* Clear unknown autoclear feature bits */
1640    update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
1641    update_header =
1642        update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
1643    if (update_header) {
1644        s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
1645    }
1646
1647    /* == Handle persistent dirty bitmaps ==
1648     *
1649     * We want load dirty bitmaps in three cases:
1650     *
1651     * 1. Normal open of the disk in active mode, not related to invalidation
1652     *    after migration.
1653     *
1654     * 2. Invalidation of the target vm after pre-copy phase of migration, if
1655     *    bitmaps are _not_ migrating through migration channel, i.e.
1656     *    'dirty-bitmaps' capability is disabled.
1657     *
1658     * 3. Invalidation of source vm after failed or canceled migration.
1659     *    This is a very interesting case. There are two possible types of
1660     *    bitmaps:
1661     *
1662     *    A. Stored on inactivation and removed. They should be loaded from the
1663     *       image.
1664     *
1665     *    B. Not stored: not-persistent bitmaps and bitmaps, migrated through
1666     *       the migration channel (with dirty-bitmaps capability).
1667     *
1668     *    On the other hand, there are two possible sub-cases:
1669     *
1670     *    3.1 disk was changed by somebody else while were inactive. In this
1671     *        case all in-RAM dirty bitmaps (both persistent and not) are
1672     *        definitely invalid. And we don't have any method to determine
1673     *        this.
1674     *
1675     *        Simple and safe thing is to just drop all the bitmaps of type B on
1676     *        inactivation. But in this case we lose bitmaps in valid 4.2 case.
1677     *
1678     *        On the other hand, resuming source vm, if disk was already changed
1679     *        is a bad thing anyway: not only bitmaps, the whole vm state is
1680     *        out of sync with disk.
1681     *
1682     *        This means, that user or management tool, who for some reason
1683     *        decided to resume source vm, after disk was already changed by
1684     *        target vm, should at least drop all dirty bitmaps by hand.
1685     *
1686     *        So, we can ignore this case for now, but TODO: "generation"
1687     *        extension for qcow2, to determine, that image was changed after
1688     *        last inactivation. And if it is changed, we will drop (or at least
1689     *        mark as 'invalid' all the bitmaps of type B, both persistent
1690     *        and not).
1691     *
1692     *    3.2 disk was _not_ changed while were inactive. Bitmaps may be saved
1693     *        to disk ('dirty-bitmaps' capability disabled), or not saved
1694     *        ('dirty-bitmaps' capability enabled), but we don't need to care
1695     *        of: let's load bitmaps as always: stored bitmaps will be loaded,
1696     *        and not stored has flag IN_USE=1 in the image and will be skipped
1697     *        on loading.
1698     *
1699     * One remaining possible case when we don't want load bitmaps:
1700     *
1701     * 4. Open disk in inactive mode in target vm (bitmaps are migrating or
1702     *    will be loaded on invalidation, no needs try loading them before)
1703     */
1704
1705    if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
1706        /* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */
1707        bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err);
1708
1709        update_header = update_header && !header_updated;
1710    }
1711    if (local_err != NULL) {
1712        error_propagate(errp, local_err);
1713        ret = -EINVAL;
1714        goto fail;
1715    }
1716
1717    if (update_header) {
1718        ret = qcow2_update_header(bs);
1719        if (ret < 0) {
1720            error_setg_errno(errp, -ret, "Could not update qcow2 header");
1721            goto fail;
1722        }
1723    }
1724
1725    bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0;
1726
1727    /* Repair image if dirty */
1728    if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
1729        (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
1730        BdrvCheckResult result = {0};
1731
1732        ret = qcow2_co_check_locked(bs, &result,
1733                                    BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
1734        if (ret < 0 || result.check_errors) {
1735            if (ret >= 0) {
1736                ret = -EIO;
1737            }
1738            error_setg_errno(errp, -ret, "Could not repair dirty image");
1739            goto fail;
1740        }
1741    }
1742
1743#ifdef DEBUG_ALLOC
1744    {
1745        BdrvCheckResult result = {0};
1746        qcow2_check_refcounts(bs, &result, 0);
1747    }
1748#endif
1749
1750    qemu_co_queue_init(&s->thread_task_queue);
1751
1752    return ret;
1753
1754 fail:
1755    g_free(s->image_data_file);
1756    if (has_data_file(bs)) {
1757        bdrv_unref_child(bs, s->data_file);
1758    }
1759    g_free(s->unknown_header_fields);
1760    cleanup_unknown_header_ext(bs);
1761    qcow2_free_snapshots(bs);
1762    qcow2_refcount_close(bs);
1763    qemu_vfree(s->l1_table);
1764    /* else pre-write overlap checks in cache_destroy may crash */
1765    s->l1_table = NULL;
1766    cache_clean_timer_del(bs);
1767    if (s->l2_table_cache) {
1768        qcow2_cache_destroy(s->l2_table_cache);
1769    }
1770    if (s->refcount_block_cache) {
1771        qcow2_cache_destroy(s->refcount_block_cache);
1772    }
1773    qcrypto_block_free(s->crypto);
1774    qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1775    return ret;
1776}
1777
1778typedef struct QCow2OpenCo {
1779    BlockDriverState *bs;
1780    QDict *options;
1781    int flags;
1782    Error **errp;
1783    int ret;
1784} QCow2OpenCo;
1785
1786static void coroutine_fn qcow2_open_entry(void *opaque)
1787{
1788    QCow2OpenCo *qoc = opaque;
1789    BDRVQcow2State *s = qoc->bs->opaque;
1790
1791    qemu_co_mutex_lock(&s->lock);
1792    qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp);
1793    qemu_co_mutex_unlock(&s->lock);
1794}
1795
1796static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
1797                      Error **errp)
1798{
1799    BDRVQcow2State *s = bs->opaque;
1800    QCow2OpenCo qoc = {
1801        .bs = bs,
1802        .options = options,
1803        .flags = flags,
1804        .errp = errp,
1805        .ret = -EINPROGRESS
1806    };
1807
1808    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
1809                               false, errp);
1810    if (!bs->file) {
1811        return -EINVAL;
1812    }
1813
1814    /* Initialise locks */
1815    qemu_co_mutex_init(&s->lock);
1816
1817    if (qemu_in_coroutine()) {
1818        /* From bdrv_co_create.  */
1819        qcow2_open_entry(&qoc);
1820    } else {
1821        assert(qemu_get_current_aio_context() == qemu_get_aio_context());
1822        qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
1823        BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
1824    }
1825    return qoc.ret;
1826}
1827
1828static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
1829{
1830    BDRVQcow2State *s = bs->opaque;
1831
1832    if (bs->encrypted) {
1833        /* Encryption works on a sector granularity */
1834        bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
1835    }
1836    bs->bl.pwrite_zeroes_alignment = s->cluster_size;
1837    bs->bl.pdiscard_alignment = s->cluster_size;
1838}
1839
1840static int qcow2_reopen_prepare(BDRVReopenState *state,
1841                                BlockReopenQueue *queue, Error **errp)
1842{
1843    Qcow2ReopenState *r;
1844    int ret;
1845
1846    r = g_new0(Qcow2ReopenState, 1);
1847    state->opaque = r;
1848
1849    ret = qcow2_update_options_prepare(state->bs, r, state->options,
1850                                       state->flags, errp);
1851    if (ret < 0) {
1852        goto fail;
1853    }
1854
1855    /* We need to write out any unwritten data if we reopen read-only. */
1856    if ((state->flags & BDRV_O_RDWR) == 0) {
1857        ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
1858        if (ret < 0) {
1859            goto fail;
1860        }
1861
1862        ret = bdrv_flush(state->bs);
1863        if (ret < 0) {
1864            goto fail;
1865        }
1866
1867        ret = qcow2_mark_clean(state->bs);
1868        if (ret < 0) {
1869            goto fail;
1870        }
1871    }
1872
1873    return 0;
1874
1875fail:
1876    qcow2_update_options_abort(state->bs, r);
1877    g_free(r);
1878    return ret;
1879}
1880
1881static void qcow2_reopen_commit(BDRVReopenState *state)
1882{
1883    qcow2_update_options_commit(state->bs, state->opaque);
1884    if (state->flags & BDRV_O_RDWR) {
1885        Error *local_err = NULL;
1886
1887        if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) {
1888            /*
1889             * This is not fatal, bitmaps just left read-only, so all following
1890             * writes will fail. User can remove read-only bitmaps to unblock
1891             * writes or retry reopen.
1892             */
1893            error_reportf_err(local_err,
1894                              "%s: Failed to make dirty bitmaps writable: ",
1895                              bdrv_get_node_name(state->bs));
1896        }
1897    }
1898    g_free(state->opaque);
1899}
1900
1901static void qcow2_reopen_abort(BDRVReopenState *state)
1902{
1903    qcow2_update_options_abort(state->bs, state->opaque);
1904    g_free(state->opaque);
1905}
1906
1907static void qcow2_join_options(QDict *options, QDict *old_options)
1908{
1909    bool has_new_overlap_template =
1910        qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
1911        qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
1912    bool has_new_total_cache_size =
1913        qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
1914    bool has_all_cache_options;
1915
1916    /* New overlap template overrides all old overlap options */
1917    if (has_new_overlap_template) {
1918        qdict_del(old_options, QCOW2_OPT_OVERLAP);
1919        qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
1920        qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
1921        qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
1922        qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
1923        qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
1924        qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
1925        qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
1926        qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
1927        qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
1928    }
1929
1930    /* New total cache size overrides all old options */
1931    if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
1932        qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
1933        qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1934    }
1935
1936    qdict_join(options, old_options, false);
1937
1938    /*
1939     * If after merging all cache size options are set, an old total size is
1940     * overwritten. Do keep all options, however, if all three are new. The
1941     * resulting error message is what we want to happen.
1942     */
1943    has_all_cache_options =
1944        qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
1945        qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
1946        qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1947
1948    if (has_all_cache_options && !has_new_total_cache_size) {
1949        qdict_del(options, QCOW2_OPT_CACHE_SIZE);
1950    }
1951}
1952
1953static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
1954                                              bool want_zero,
1955                                              int64_t offset, int64_t count,
1956                                              int64_t *pnum, int64_t *map,
1957                                              BlockDriverState **file)
1958{
1959    BDRVQcow2State *s = bs->opaque;
1960    uint64_t cluster_offset;
1961    int index_in_cluster, ret;
1962    unsigned int bytes;
1963    int status = 0;
1964
1965    qemu_co_mutex_lock(&s->lock);
1966
1967    if (!s->metadata_preallocation_checked) {
1968        ret = qcow2_detect_metadata_preallocation(bs);
1969        s->metadata_preallocation = (ret == 1);
1970        s->metadata_preallocation_checked = true;
1971    }
1972
1973    bytes = MIN(INT_MAX, count);
1974    ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
1975    qemu_co_mutex_unlock(&s->lock);
1976    if (ret < 0) {
1977        return ret;
1978    }
1979
1980    *pnum = bytes;
1981
1982    if ((ret == QCOW2_CLUSTER_NORMAL || ret == QCOW2_CLUSTER_ZERO_ALLOC) &&
1983        !s->crypto) {
1984        index_in_cluster = offset & (s->cluster_size - 1);
1985        *map = cluster_offset | index_in_cluster;
1986        *file = s->data_file->bs;
1987        status |= BDRV_BLOCK_OFFSET_VALID;
1988    }
1989    if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
1990        status |= BDRV_BLOCK_ZERO;
1991    } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
1992        status |= BDRV_BLOCK_DATA;
1993    }
1994    if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) &&
1995        (status & BDRV_BLOCK_OFFSET_VALID))
1996    {
1997        status |= BDRV_BLOCK_RECURSE;
1998    }
1999    return status;
2000}
2001
2002static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
2003                                            QCowL2Meta **pl2meta,
2004                                            bool link_l2)
2005{
2006    int ret = 0;
2007    QCowL2Meta *l2meta = *pl2meta;
2008
2009    while (l2meta != NULL) {
2010        QCowL2Meta *next;
2011
2012        if (link_l2) {
2013            ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
2014            if (ret) {
2015                goto out;
2016            }
2017        } else {
2018            qcow2_alloc_cluster_abort(bs, l2meta);
2019        }
2020
2021        /* Take the request off the list of running requests */
2022        if (l2meta->nb_clusters != 0) {
2023            QLIST_REMOVE(l2meta, next_in_flight);
2024        }
2025
2026        qemu_co_queue_restart_all(&l2meta->dependent_requests);
2027
2028        next = l2meta->next;
2029        g_free(l2meta);
2030        l2meta = next;
2031    }
2032out:
2033    *pl2meta = l2meta;
2034    return ret;
2035}
2036
2037static coroutine_fn int
2038qcow2_co_preadv_encrypted(BlockDriverState *bs,
2039                           uint64_t file_cluster_offset,
2040                           uint64_t offset,
2041                           uint64_t bytes,
2042                           QEMUIOVector *qiov,
2043                           uint64_t qiov_offset)
2044{
2045    int ret;
2046    BDRVQcow2State *s = bs->opaque;
2047    uint8_t *buf;
2048
2049    assert(bs->encrypted && s->crypto);
2050    assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2051
2052    /*
2053     * For encrypted images, read everything into a temporary
2054     * contiguous buffer on which the AES functions can work.
2055     * Also, decryption in a separate buffer is better as it
2056     * prevents the guest from learning information about the
2057     * encrypted nature of the virtual disk.
2058     */
2059
2060    buf = qemu_try_blockalign(s->data_file->bs, bytes);
2061    if (buf == NULL) {
2062        return -ENOMEM;
2063    }
2064
2065    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
2066    ret = bdrv_co_pread(s->data_file,
2067                        file_cluster_offset + offset_into_cluster(s, offset),
2068                        bytes, buf, 0);
2069    if (ret < 0) {
2070        goto fail;
2071    }
2072
2073    assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
2074    assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
2075    if (qcow2_co_decrypt(bs,
2076                         file_cluster_offset + offset_into_cluster(s, offset),
2077                         offset, buf, bytes) < 0)
2078    {
2079        ret = -EIO;
2080        goto fail;
2081    }
2082    qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes);
2083
2084fail:
2085    qemu_vfree(buf);
2086
2087    return ret;
2088}
2089
2090typedef struct Qcow2AioTask {
2091    AioTask task;
2092
2093    BlockDriverState *bs;
2094    QCow2ClusterType cluster_type; /* only for read */
2095    uint64_t file_cluster_offset;
2096    uint64_t offset;
2097    uint64_t bytes;
2098    QEMUIOVector *qiov;
2099    uint64_t qiov_offset;
2100    QCowL2Meta *l2meta; /* only for write */
2101} Qcow2AioTask;
2102
2103static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
2104static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
2105                                       AioTaskPool *pool,
2106                                       AioTaskFunc func,
2107                                       QCow2ClusterType cluster_type,
2108                                       uint64_t file_cluster_offset,
2109                                       uint64_t offset,
2110                                       uint64_t bytes,
2111                                       QEMUIOVector *qiov,
2112                                       size_t qiov_offset,
2113                                       QCowL2Meta *l2meta)
2114{
2115    Qcow2AioTask local_task;
2116    Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
2117
2118    *task = (Qcow2AioTask) {
2119        .task.func = func,
2120        .bs = bs,
2121        .cluster_type = cluster_type,
2122        .qiov = qiov,
2123        .file_cluster_offset = file_cluster_offset,
2124        .offset = offset,
2125        .bytes = bytes,
2126        .qiov_offset = qiov_offset,
2127        .l2meta = l2meta,
2128    };
2129
2130    trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
2131                         func == qcow2_co_preadv_task_entry ? "read" : "write",
2132                         cluster_type, file_cluster_offset, offset, bytes,
2133                         qiov, qiov_offset);
2134
2135    if (!pool) {
2136        return func(&task->task);
2137    }
2138
2139    aio_task_pool_start_task(pool, &task->task);
2140
2141    return 0;
2142}
2143
2144static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
2145                                             QCow2ClusterType cluster_type,
2146                                             uint64_t file_cluster_offset,
2147                                             uint64_t offset, uint64_t bytes,
2148                                             QEMUIOVector *qiov,
2149                                             size_t qiov_offset)
2150{
2151    BDRVQcow2State *s = bs->opaque;
2152    int offset_in_cluster = offset_into_cluster(s, offset);
2153
2154    switch (cluster_type) {
2155    case QCOW2_CLUSTER_ZERO_PLAIN:
2156    case QCOW2_CLUSTER_ZERO_ALLOC:
2157        /* Both zero types are handled in qcow2_co_preadv_part */
2158        g_assert_not_reached();
2159
2160    case QCOW2_CLUSTER_UNALLOCATED:
2161        assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
2162
2163        BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
2164        return bdrv_co_preadv_part(bs->backing, offset, bytes,
2165                                   qiov, qiov_offset, 0);
2166
2167    case QCOW2_CLUSTER_COMPRESSED:
2168        return qcow2_co_preadv_compressed(bs, file_cluster_offset,
2169                                          offset, bytes, qiov, qiov_offset);
2170
2171    case QCOW2_CLUSTER_NORMAL:
2172        if ((file_cluster_offset & 511) != 0) {
2173            return -EIO;
2174        }
2175
2176        if (bs->encrypted) {
2177            return qcow2_co_preadv_encrypted(bs, file_cluster_offset,
2178                                             offset, bytes, qiov, qiov_offset);
2179        }
2180
2181        BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
2182        return bdrv_co_preadv_part(s->data_file,
2183                                   file_cluster_offset + offset_in_cluster,
2184                                   bytes, qiov, qiov_offset, 0);
2185
2186    default:
2187        g_assert_not_reached();
2188    }
2189
2190    g_assert_not_reached();
2191}
2192
2193static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
2194{
2195    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
2196
2197    assert(!t->l2meta);
2198
2199    return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset,
2200                                t->offset, t->bytes, t->qiov, t->qiov_offset);
2201}
2202
2203static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
2204                                             uint64_t offset, uint64_t bytes,
2205                                             QEMUIOVector *qiov,
2206                                             size_t qiov_offset, int flags)
2207{
2208    BDRVQcow2State *s = bs->opaque;
2209    int ret = 0;
2210    unsigned int cur_bytes; /* number of bytes in current iteration */
2211    uint64_t cluster_offset = 0;
2212    AioTaskPool *aio = NULL;
2213
2214    while (bytes != 0 && aio_task_pool_status(aio) == 0) {
2215        /* prepare next request */
2216        cur_bytes = MIN(bytes, INT_MAX);
2217        if (s->crypto) {
2218            cur_bytes = MIN(cur_bytes,
2219                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2220        }
2221
2222        qemu_co_mutex_lock(&s->lock);
2223        ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
2224        qemu_co_mutex_unlock(&s->lock);
2225        if (ret < 0) {
2226            goto out;
2227        }
2228
2229        if (ret == QCOW2_CLUSTER_ZERO_PLAIN ||
2230            ret == QCOW2_CLUSTER_ZERO_ALLOC ||
2231            (ret == QCOW2_CLUSTER_UNALLOCATED && !bs->backing))
2232        {
2233            qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
2234        } else {
2235            if (!aio && cur_bytes != bytes) {
2236                aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
2237            }
2238            ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret,
2239                                 cluster_offset, offset, cur_bytes,
2240                                 qiov, qiov_offset, NULL);
2241            if (ret < 0) {
2242                goto out;
2243            }
2244        }
2245
2246        bytes -= cur_bytes;
2247        offset += cur_bytes;
2248        qiov_offset += cur_bytes;
2249    }
2250
2251out:
2252    if (aio) {
2253        aio_task_pool_wait_all(aio);
2254        if (ret == 0) {
2255            ret = aio_task_pool_status(aio);
2256        }
2257        g_free(aio);
2258    }
2259
2260    return ret;
2261}
2262
2263/* Check if it's possible to merge a write request with the writing of
2264 * the data from the COW regions */
2265static bool merge_cow(uint64_t offset, unsigned bytes,
2266                      QEMUIOVector *qiov, size_t qiov_offset,
2267                      QCowL2Meta *l2meta)
2268{
2269    QCowL2Meta *m;
2270
2271    for (m = l2meta; m != NULL; m = m->next) {
2272        /* If both COW regions are empty then there's nothing to merge */
2273        if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
2274            continue;
2275        }
2276
2277        /* If COW regions are handled already, skip this too */
2278        if (m->skip_cow) {
2279            continue;
2280        }
2281
2282        /* The data (middle) region must be immediately after the
2283         * start region */
2284        if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
2285            continue;
2286        }
2287
2288        /* The end region must be immediately after the data (middle)
2289         * region */
2290        if (m->offset + m->cow_end.offset != offset + bytes) {
2291            continue;
2292        }
2293
2294        /* Make sure that adding both COW regions to the QEMUIOVector
2295         * does not exceed IOV_MAX */
2296        if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
2297            continue;
2298        }
2299
2300        m->data_qiov = qiov;
2301        m->data_qiov_offset = qiov_offset;
2302        return true;
2303    }
2304
2305    return false;
2306}
2307
2308static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes)
2309{
2310    int64_t nr;
2311    return !bytes ||
2312        (!bdrv_is_allocated_above(bs, NULL, false, offset, bytes, &nr) &&
2313         nr == bytes);
2314}
2315
2316static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
2317{
2318    /*
2319     * This check is designed for optimization shortcut so it must be
2320     * efficient.
2321     * Instead of is_zero(), use is_unallocated() as it is faster (but not
2322     * as accurate and can result in false negatives).
2323     */
2324    return is_unallocated(bs, m->offset + m->cow_start.offset,
2325                          m->cow_start.nb_bytes) &&
2326           is_unallocated(bs, m->offset + m->cow_end.offset,
2327                          m->cow_end.nb_bytes);
2328}
2329
2330static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
2331{
2332    BDRVQcow2State *s = bs->opaque;
2333    QCowL2Meta *m;
2334
2335    if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
2336        return 0;
2337    }
2338
2339    if (bs->encrypted) {
2340        return 0;
2341    }
2342
2343    for (m = l2meta; m != NULL; m = m->next) {
2344        int ret;
2345
2346        if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
2347            continue;
2348        }
2349
2350        if (!is_zero_cow(bs, m)) {
2351            continue;
2352        }
2353
2354        /*
2355         * instead of writing zero COW buffers,
2356         * efficiently zero out the whole clusters
2357         */
2358
2359        ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset,
2360                                            m->nb_clusters * s->cluster_size,
2361                                            true);
2362        if (ret < 0) {
2363            return ret;
2364        }
2365
2366        BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
2367        ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset,
2368                                    m->nb_clusters * s->cluster_size,
2369                                    BDRV_REQ_NO_FALLBACK);
2370        if (ret < 0) {
2371            if (ret != -ENOTSUP && ret != -EAGAIN) {
2372                return ret;
2373            }
2374            continue;
2375        }
2376
2377        trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
2378        m->skip_cow = true;
2379    }
2380    return 0;
2381}
2382
2383/*
2384 * qcow2_co_pwritev_task
2385 * Called with s->lock unlocked
2386 * l2meta  - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
2387 *           not use it somehow after qcow2_co_pwritev_task() call
2388 */
2389static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
2390                                              uint64_t file_cluster_offset,
2391                                              uint64_t offset, uint64_t bytes,
2392                                              QEMUIOVector *qiov,
2393                                              uint64_t qiov_offset,
2394                                              QCowL2Meta *l2meta)
2395{
2396    int ret;
2397    BDRVQcow2State *s = bs->opaque;
2398    void *crypt_buf = NULL;
2399    int offset_in_cluster = offset_into_cluster(s, offset);
2400    QEMUIOVector encrypted_qiov;
2401
2402    if (bs->encrypted) {
2403        assert(s->crypto);
2404        assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2405        crypt_buf = qemu_try_blockalign(bs->file->bs, bytes);
2406        if (crypt_buf == NULL) {
2407            ret = -ENOMEM;
2408            goto out_unlocked;
2409        }
2410        qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes);
2411
2412        if (qcow2_co_encrypt(bs, file_cluster_offset + offset_in_cluster,
2413                             offset, crypt_buf, bytes) < 0)
2414        {
2415            ret = -EIO;
2416            goto out_unlocked;
2417        }
2418
2419        qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes);
2420        qiov = &encrypted_qiov;
2421        qiov_offset = 0;
2422    }
2423
2424    /* Try to efficiently initialize the physical space with zeroes */
2425    ret = handle_alloc_space(bs, l2meta);
2426    if (ret < 0) {
2427        goto out_unlocked;
2428    }
2429
2430    /*
2431     * If we need to do COW, check if it's possible to merge the
2432     * writing of the guest data together with that of the COW regions.
2433     * If it's not possible (or not necessary) then write the
2434     * guest data now.
2435     */
2436    if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
2437        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
2438        trace_qcow2_writev_data(qemu_coroutine_self(),
2439                                file_cluster_offset + offset_in_cluster);
2440        ret = bdrv_co_pwritev_part(s->data_file,
2441                                   file_cluster_offset + offset_in_cluster,
2442                                   bytes, qiov, qiov_offset, 0);
2443        if (ret < 0) {
2444            goto out_unlocked;
2445        }
2446    }
2447
2448    qemu_co_mutex_lock(&s->lock);
2449
2450    ret = qcow2_handle_l2meta(bs, &l2meta, true);
2451    goto out_locked;
2452
2453out_unlocked:
2454    qemu_co_mutex_lock(&s->lock);
2455
2456out_locked:
2457    qcow2_handle_l2meta(bs, &l2meta, false);
2458    qemu_co_mutex_unlock(&s->lock);
2459
2460    qemu_vfree(crypt_buf);
2461
2462    return ret;
2463}
2464
2465static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
2466{
2467    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
2468
2469    assert(!t->cluster_type);
2470
2471    return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset,
2472                                 t->offset, t->bytes, t->qiov, t->qiov_offset,
2473                                 t->l2meta);
2474}
2475
2476static coroutine_fn int qcow2_co_pwritev_part(
2477        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
2478        QEMUIOVector *qiov, size_t qiov_offset, int flags)
2479{
2480    BDRVQcow2State *s = bs->opaque;
2481    int offset_in_cluster;
2482    int ret;
2483    unsigned int cur_bytes; /* number of sectors in current iteration */
2484    uint64_t cluster_offset;
2485    QCowL2Meta *l2meta = NULL;
2486    AioTaskPool *aio = NULL;
2487
2488    trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
2489
2490    while (bytes != 0 && aio_task_pool_status(aio) == 0) {
2491
2492        l2meta = NULL;
2493
2494        trace_qcow2_writev_start_part(qemu_coroutine_self());
2495        offset_in_cluster = offset_into_cluster(s, offset);
2496        cur_bytes = MIN(bytes, INT_MAX);
2497        if (bs->encrypted) {
2498            cur_bytes = MIN(cur_bytes,
2499                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
2500                            - offset_in_cluster);
2501        }
2502
2503        qemu_co_mutex_lock(&s->lock);
2504
2505        ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
2506                                         &cluster_offset, &l2meta);
2507        if (ret < 0) {
2508            goto out_locked;
2509        }
2510
2511        assert((cluster_offset & 511) == 0);
2512
2513        ret = qcow2_pre_write_overlap_check(bs, 0,
2514                                            cluster_offset + offset_in_cluster,
2515                                            cur_bytes, true);
2516        if (ret < 0) {
2517            goto out_locked;
2518        }
2519
2520        qemu_co_mutex_unlock(&s->lock);
2521
2522        if (!aio && cur_bytes != bytes) {
2523            aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
2524        }
2525        ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
2526                             cluster_offset, offset, cur_bytes,
2527                             qiov, qiov_offset, l2meta);
2528        l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */
2529        if (ret < 0) {
2530            goto fail_nometa;
2531        }
2532
2533        bytes -= cur_bytes;
2534        offset += cur_bytes;
2535        qiov_offset += cur_bytes;
2536        trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
2537    }
2538    ret = 0;
2539
2540    qemu_co_mutex_lock(&s->lock);
2541
2542out_locked:
2543    qcow2_handle_l2meta(bs, &l2meta, false);
2544
2545    qemu_co_mutex_unlock(&s->lock);
2546
2547fail_nometa:
2548    if (aio) {
2549        aio_task_pool_wait_all(aio);
2550        if (ret == 0) {
2551            ret = aio_task_pool_status(aio);
2552        }
2553        g_free(aio);
2554    }
2555
2556    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
2557
2558    return ret;
2559}
2560
2561static int qcow2_inactivate(BlockDriverState *bs)
2562{
2563    BDRVQcow2State *s = bs->opaque;
2564    int ret, result = 0;
2565    Error *local_err = NULL;
2566
2567    qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err);
2568    if (local_err != NULL) {
2569        result = -EINVAL;
2570        error_reportf_err(local_err, "Lost persistent bitmaps during "
2571                          "inactivation of node '%s': ",
2572                          bdrv_get_device_or_node_name(bs));
2573    }
2574
2575    ret = qcow2_cache_flush(bs, s->l2_table_cache);
2576    if (ret) {
2577        result = ret;
2578        error_report("Failed to flush the L2 table cache: %s",
2579                     strerror(-ret));
2580    }
2581
2582    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
2583    if (ret) {
2584        result = ret;
2585        error_report("Failed to flush the refcount block cache: %s",
2586                     strerror(-ret));
2587    }
2588
2589    if (result == 0) {
2590        qcow2_mark_clean(bs);
2591    }
2592
2593    return result;
2594}
2595
2596static void qcow2_close(BlockDriverState *bs)
2597{
2598    BDRVQcow2State *s = bs->opaque;
2599    qemu_vfree(s->l1_table);
2600    /* else pre-write overlap checks in cache_destroy may crash */
2601    s->l1_table = NULL;
2602
2603    if (!(s->flags & BDRV_O_INACTIVE)) {
2604        qcow2_inactivate(bs);
2605    }
2606
2607    cache_clean_timer_del(bs);
2608    qcow2_cache_destroy(s->l2_table_cache);
2609    qcow2_cache_destroy(s->refcount_block_cache);
2610
2611    qcrypto_block_free(s->crypto);
2612    s->crypto = NULL;
2613
2614    g_free(s->unknown_header_fields);
2615    cleanup_unknown_header_ext(bs);
2616
2617    g_free(s->image_data_file);
2618    g_free(s->image_backing_file);
2619    g_free(s->image_backing_format);
2620
2621    if (has_data_file(bs)) {
2622        bdrv_unref_child(bs, s->data_file);
2623    }
2624
2625    qcow2_refcount_close(bs);
2626    qcow2_free_snapshots(bs);
2627}
2628
2629static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
2630                                                   Error **errp)
2631{
2632    BDRVQcow2State *s = bs->opaque;
2633    int flags = s->flags;
2634    QCryptoBlock *crypto = NULL;
2635    QDict *options;
2636    Error *local_err = NULL;
2637    int ret;
2638
2639    /*
2640     * Backing files are read-only which makes all of their metadata immutable,
2641     * that means we don't have to worry about reopening them here.
2642     */
2643
2644    crypto = s->crypto;
2645    s->crypto = NULL;
2646
2647    qcow2_close(bs);
2648
2649    memset(s, 0, sizeof(BDRVQcow2State));
2650    options = qdict_clone_shallow(bs->options);
2651
2652    flags &= ~BDRV_O_INACTIVE;
2653    qemu_co_mutex_lock(&s->lock);
2654    ret = qcow2_do_open(bs, options, flags, &local_err);
2655    qemu_co_mutex_unlock(&s->lock);
2656    qobject_unref(options);
2657    if (local_err) {
2658        error_propagate_prepend(errp, local_err,
2659                                "Could not reopen qcow2 layer: ");
2660        bs->drv = NULL;
2661        return;
2662    } else if (ret < 0) {
2663        error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
2664        bs->drv = NULL;
2665        return;
2666    }
2667
2668    s->crypto = crypto;
2669}
2670
2671static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
2672    size_t len, size_t buflen)
2673{
2674    QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
2675    size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
2676
2677    if (buflen < ext_len) {
2678        return -ENOSPC;
2679    }
2680
2681    *ext_backing_fmt = (QCowExtension) {
2682        .magic  = cpu_to_be32(magic),
2683        .len    = cpu_to_be32(len),
2684    };
2685
2686    if (len) {
2687        memcpy(buf + sizeof(QCowExtension), s, len);
2688    }
2689
2690    return ext_len;
2691}
2692
2693/*
2694 * Updates the qcow2 header, including the variable length parts of it, i.e.
2695 * the backing file name and all extensions. qcow2 was not designed to allow
2696 * such changes, so if we run out of space (we can only use the first cluster)
2697 * this function may fail.
2698 *
2699 * Returns 0 on success, -errno in error cases.
2700 */
2701int qcow2_update_header(BlockDriverState *bs)
2702{
2703    BDRVQcow2State *s = bs->opaque;
2704    QCowHeader *header;
2705    char *buf;
2706    size_t buflen = s->cluster_size;
2707    int ret;
2708    uint64_t total_size;
2709    uint32_t refcount_table_clusters;
2710    size_t header_length;
2711    Qcow2UnknownHeaderExtension *uext;
2712
2713    buf = qemu_blockalign(bs, buflen);
2714
2715    /* Header structure */
2716    header = (QCowHeader*) buf;
2717
2718    if (buflen < sizeof(*header)) {
2719        ret = -ENOSPC;
2720        goto fail;
2721    }
2722
2723    header_length = sizeof(*header) + s->unknown_header_fields_size;
2724    total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
2725    refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
2726
2727    *header = (QCowHeader) {
2728        /* Version 2 fields */
2729        .magic                  = cpu_to_be32(QCOW_MAGIC),
2730        .version                = cpu_to_be32(s->qcow_version),
2731        .backing_file_offset    = 0,
2732        .backing_file_size      = 0,
2733        .cluster_bits           = cpu_to_be32(s->cluster_bits),
2734        .size                   = cpu_to_be64(total_size),
2735        .crypt_method           = cpu_to_be32(s->crypt_method_header),
2736        .l1_size                = cpu_to_be32(s->l1_size),
2737        .l1_table_offset        = cpu_to_be64(s->l1_table_offset),
2738        .refcount_table_offset  = cpu_to_be64(s->refcount_table_offset),
2739        .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
2740        .nb_snapshots           = cpu_to_be32(s->nb_snapshots),
2741        .snapshots_offset       = cpu_to_be64(s->snapshots_offset),
2742
2743        /* Version 3 fields */
2744        .incompatible_features  = cpu_to_be64(s->incompatible_features),
2745        .compatible_features    = cpu_to_be64(s->compatible_features),
2746        .autoclear_features     = cpu_to_be64(s->autoclear_features),
2747        .refcount_order         = cpu_to_be32(s->refcount_order),
2748        .header_length          = cpu_to_be32(header_length),
2749    };
2750
2751    /* For older versions, write a shorter header */
2752    switch (s->qcow_version) {
2753    case 2:
2754        ret = offsetof(QCowHeader, incompatible_features);
2755        break;
2756    case 3:
2757        ret = sizeof(*header);
2758        break;
2759    default:
2760        ret = -EINVAL;
2761        goto fail;
2762    }
2763
2764    buf += ret;
2765    buflen -= ret;
2766    memset(buf, 0, buflen);
2767
2768    /* Preserve any unknown field in the header */
2769    if (s->unknown_header_fields_size) {
2770        if (buflen < s->unknown_header_fields_size) {
2771            ret = -ENOSPC;
2772            goto fail;
2773        }
2774
2775        memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
2776        buf += s->unknown_header_fields_size;
2777        buflen -= s->unknown_header_fields_size;
2778    }
2779
2780    /* Backing file format header extension */
2781    if (s->image_backing_format) {
2782        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
2783                             s->image_backing_format,
2784                             strlen(s->image_backing_format),
2785                             buflen);
2786        if (ret < 0) {
2787            goto fail;
2788        }
2789
2790        buf += ret;
2791        buflen -= ret;
2792    }
2793
2794    /* External data file header extension */
2795    if (has_data_file(bs) && s->image_data_file) {
2796        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE,
2797                             s->image_data_file, strlen(s->image_data_file),
2798                             buflen);
2799        if (ret < 0) {
2800            goto fail;
2801        }
2802
2803        buf += ret;
2804        buflen -= ret;
2805    }
2806
2807    /* Full disk encryption header pointer extension */
2808    if (s->crypto_header.offset != 0) {
2809        s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset);
2810        s->crypto_header.length = cpu_to_be64(s->crypto_header.length);
2811        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
2812                             &s->crypto_header, sizeof(s->crypto_header),
2813                             buflen);
2814        s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
2815        s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
2816        if (ret < 0) {
2817            goto fail;
2818        }
2819        buf += ret;
2820        buflen -= ret;
2821    }
2822
2823    /* Feature table */
2824    if (s->qcow_version >= 3) {
2825        Qcow2Feature features[] = {
2826            {
2827                .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2828                .bit  = QCOW2_INCOMPAT_DIRTY_BITNR,
2829                .name = "dirty bit",
2830            },
2831            {
2832                .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2833                .bit  = QCOW2_INCOMPAT_CORRUPT_BITNR,
2834                .name = "corrupt bit",
2835            },
2836            {
2837                .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2838                .bit  = QCOW2_INCOMPAT_DATA_FILE_BITNR,
2839                .name = "external data file",
2840            },
2841            {
2842                .type = QCOW2_FEAT_TYPE_COMPATIBLE,
2843                .bit  = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
2844                .name = "lazy refcounts",
2845            },
2846        };
2847
2848        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
2849                             features, sizeof(features), buflen);
2850        if (ret < 0) {
2851            goto fail;
2852        }
2853        buf += ret;
2854        buflen -= ret;
2855    }
2856
2857    /* Bitmap extension */
2858    if (s->nb_bitmaps > 0) {
2859        Qcow2BitmapHeaderExt bitmaps_header = {
2860            .nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
2861            .bitmap_directory_size =
2862                    cpu_to_be64(s->bitmap_directory_size),
2863            .bitmap_directory_offset =
2864                    cpu_to_be64(s->bitmap_directory_offset)
2865        };
2866        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
2867                             &bitmaps_header, sizeof(bitmaps_header),
2868                             buflen);
2869        if (ret < 0) {
2870            goto fail;
2871        }
2872        buf += ret;
2873        buflen -= ret;
2874    }
2875
2876    /* Keep unknown header extensions */
2877    QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
2878        ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
2879        if (ret < 0) {
2880            goto fail;
2881        }
2882
2883        buf += ret;
2884        buflen -= ret;
2885    }
2886
2887    /* End of header extensions */
2888    ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
2889    if (ret < 0) {
2890        goto fail;
2891    }
2892
2893    buf += ret;
2894    buflen -= ret;
2895
2896    /* Backing file name */
2897    if (s->image_backing_file) {
2898        size_t backing_file_len = strlen(s->image_backing_file);
2899
2900        if (buflen < backing_file_len) {
2901            ret = -ENOSPC;
2902            goto fail;
2903        }
2904
2905        /* Using strncpy is ok here, since buf is not NUL-terminated. */
2906        strncpy(buf, s->image_backing_file, buflen);
2907
2908        header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
2909        header->backing_file_size   = cpu_to_be32(backing_file_len);
2910    }
2911
2912    /* Write the new header */
2913    ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
2914    if (ret < 0) {
2915        goto fail;
2916    }
2917
2918    ret = 0;
2919fail:
2920    qemu_vfree(header);
2921    return ret;
2922}
2923
2924static int qcow2_change_backing_file(BlockDriverState *bs,
2925    const char *backing_file, const char *backing_fmt)
2926{
2927    BDRVQcow2State *s = bs->opaque;
2928
2929    /* Adding a backing file means that the external data file alone won't be
2930     * enough to make sense of the content */
2931    if (backing_file && data_file_is_raw(bs)) {
2932        return -EINVAL;
2933    }
2934
2935    if (backing_file && strlen(backing_file) > 1023) {
2936        return -EINVAL;
2937    }
2938
2939    pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
2940            backing_file ?: "");
2941    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2942    pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2943
2944    g_free(s->image_backing_file);
2945    g_free(s->image_backing_format);
2946
2947    s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
2948    s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
2949
2950    return qcow2_update_header(bs);
2951}
2952
2953static int qcow2_crypt_method_from_format(const char *encryptfmt)
2954{
2955    if (g_str_equal(encryptfmt, "luks")) {
2956        return QCOW_CRYPT_LUKS;
2957    } else if (g_str_equal(encryptfmt, "aes")) {
2958        return QCOW_CRYPT_AES;
2959    } else {
2960        return -EINVAL;
2961    }
2962}
2963
2964static int qcow2_set_up_encryption(BlockDriverState *bs,
2965                                   QCryptoBlockCreateOptions *cryptoopts,
2966                                   Error **errp)
2967{
2968    BDRVQcow2State *s = bs->opaque;
2969    QCryptoBlock *crypto = NULL;
2970    int fmt, ret;
2971
2972    switch (cryptoopts->format) {
2973    case Q_CRYPTO_BLOCK_FORMAT_LUKS:
2974        fmt = QCOW_CRYPT_LUKS;
2975        break;
2976    case Q_CRYPTO_BLOCK_FORMAT_QCOW:
2977        fmt = QCOW_CRYPT_AES;
2978        break;
2979    default:
2980        error_setg(errp, "Crypto format not supported in qcow2");
2981        return -EINVAL;
2982    }
2983
2984    s->crypt_method_header = fmt;
2985
2986    crypto = qcrypto_block_create(cryptoopts, "encrypt.",
2987                                  qcow2_crypto_hdr_init_func,
2988                                  qcow2_crypto_hdr_write_func,
2989                                  bs, errp);
2990    if (!crypto) {
2991        return -EINVAL;
2992    }
2993
2994    ret = qcow2_update_header(bs);
2995    if (ret < 0) {
2996        error_setg_errno(errp, -ret, "Could not write encryption header");
2997        goto out;
2998    }
2999
3000    ret = 0;
3001 out:
3002    qcrypto_block_free(crypto);
3003    return ret;
3004}
3005
3006/**
3007 * Preallocates metadata structures for data clusters between @offset (in the
3008 * guest disk) and @new_length (which is thus generally the new guest disk
3009 * size).
3010 *
3011 * Returns: 0 on success, -errno on failure.
3012 */
3013static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
3014                                       uint64_t new_length, PreallocMode mode,
3015                                       Error **errp)
3016{
3017    BDRVQcow2State *s = bs->opaque;
3018    uint64_t bytes;
3019    uint64_t host_offset = 0;
3020    int64_t file_length;
3021    unsigned int cur_bytes;
3022    int ret;
3023    QCowL2Meta *meta;
3024
3025    assert(offset <= new_length);
3026    bytes = new_length - offset;
3027
3028    while (bytes) {
3029        cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size));
3030        ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
3031                                         &host_offset, &meta);
3032        if (ret < 0) {
3033            error_setg_errno(errp, -ret, "Allocating clusters failed");
3034            return ret;
3035        }
3036
3037        while (meta) {
3038            QCowL2Meta *next = meta->next;
3039
3040            ret = qcow2_alloc_cluster_link_l2(bs, meta);
3041            if (ret < 0) {
3042                error_setg_errno(errp, -ret, "Mapping clusters failed");
3043                qcow2_free_any_clusters(bs, meta->alloc_offset,
3044                                        meta->nb_clusters, QCOW2_DISCARD_NEVER);
3045                return ret;
3046            }
3047
3048            /* There are no dependent requests, but we need to remove our
3049             * request from the list of in-flight requests */
3050            QLIST_REMOVE(meta, next_in_flight);
3051
3052            g_free(meta);
3053            meta = next;
3054        }
3055
3056        /* TODO Preallocate data if requested */
3057
3058        bytes -= cur_bytes;
3059        offset += cur_bytes;
3060    }
3061
3062    /*
3063     * It is expected that the image file is large enough to actually contain
3064     * all of the allocated clusters (otherwise we get failing reads after
3065     * EOF). Extend the image to the last allocated sector.
3066     */
3067    file_length = bdrv_getlength(s->data_file->bs);
3068    if (file_length < 0) {
3069        error_setg_errno(errp, -file_length, "Could not get file size");
3070        return file_length;
3071    }
3072
3073    if (host_offset + cur_bytes > file_length) {
3074        if (mode == PREALLOC_MODE_METADATA) {
3075            mode = PREALLOC_MODE_OFF;
3076        }
3077        ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
3078                               mode, errp);
3079        if (ret < 0) {
3080            return ret;
3081        }
3082    }
3083
3084    return 0;
3085}
3086
3087/* qcow2_refcount_metadata_size:
3088 * @clusters: number of clusters to refcount (including data and L1/L2 tables)
3089 * @cluster_size: size of a cluster, in bytes
3090 * @refcount_order: refcount bits power-of-2 exponent
3091 * @generous_increase: allow for the refcount table to be 1.5x as large as it
3092 *                     needs to be
3093 *
3094 * Returns: Number of bytes required for refcount blocks and table metadata.
3095 */
3096int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
3097                                     int refcount_order, bool generous_increase,
3098                                     uint64_t *refblock_count)
3099{
3100    /*
3101     * Every host cluster is reference-counted, including metadata (even
3102     * refcount metadata is recursively included).
3103     *
3104     * An accurate formula for the size of refcount metadata size is difficult
3105     * to derive.  An easier method of calculation is finding the fixed point
3106     * where no further refcount blocks or table clusters are required to
3107     * reference count every cluster.
3108     */
3109    int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t);
3110    int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
3111    int64_t table = 0;  /* number of refcount table clusters */
3112    int64_t blocks = 0; /* number of refcount block clusters */
3113    int64_t last;
3114    int64_t n = 0;
3115
3116    do {
3117        last = n;
3118        blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
3119        table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
3120        n = clusters + blocks + table;
3121
3122        if (n == last && generous_increase) {
3123            clusters += DIV_ROUND_UP(table, 2);
3124            n = 0; /* force another loop */
3125            generous_increase = false;
3126        }
3127    } while (n != last);
3128
3129    if (refblock_count) {
3130        *refblock_count = blocks;
3131    }
3132
3133    return (blocks + table) * cluster_size;
3134}
3135
3136/**
3137 * qcow2_calc_prealloc_size:
3138 * @total_size: virtual disk size in bytes
3139 * @cluster_size: cluster size in bytes
3140 * @refcount_order: refcount bits power-of-2 exponent
3141 *
3142 * Returns: Total number of bytes required for the fully allocated image
3143 * (including metadata).
3144 */
3145static int64_t qcow2_calc_prealloc_size(int64_t total_size,
3146                                        size_t cluster_size,
3147                                        int refcount_order)
3148{
3149    int64_t meta_size = 0;
3150    uint64_t nl1e, nl2e;
3151    int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
3152
3153    /* header: 1 cluster */
3154    meta_size += cluster_size;
3155
3156    /* total size of L2 tables */
3157    nl2e = aligned_total_size / cluster_size;
3158    nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t));
3159    meta_size += nl2e * sizeof(uint64_t);
3160
3161    /* total size of L1 tables */
3162    nl1e = nl2e * sizeof(uint64_t) / cluster_size;
3163    nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t));
3164    meta_size += nl1e * sizeof(uint64_t);
3165
3166    /* total size of refcount table and blocks */
3167    meta_size += qcow2_refcount_metadata_size(
3168            (meta_size + aligned_total_size) / cluster_size,
3169            cluster_size, refcount_order, false, NULL);
3170
3171    return meta_size + aligned_total_size;
3172}
3173
3174static bool validate_cluster_size(size_t cluster_size, Error **errp)
3175{
3176    int cluster_bits = ctz32(cluster_size);
3177    if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
3178        (1 << cluster_bits) != cluster_size)
3179    {
3180        error_setg(errp, "Cluster size must be a power of two between %d and "
3181                   "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
3182        return false;
3183    }
3184    return true;
3185}
3186
3187static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp)
3188{
3189    size_t cluster_size;
3190
3191    cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
3192                                         DEFAULT_CLUSTER_SIZE);
3193    if (!validate_cluster_size(cluster_size, errp)) {
3194        return 0;
3195    }
3196    return cluster_size;
3197}
3198
3199static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
3200{
3201    char *buf;
3202    int ret;
3203
3204    buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
3205    if (!buf) {
3206        ret = 3; /* default */
3207    } else if (!strcmp(buf, "0.10")) {
3208        ret = 2;
3209    } else if (!strcmp(buf, "1.1")) {
3210        ret = 3;
3211    } else {
3212        error_setg(errp, "Invalid compatibility level: '%s'", buf);
3213        ret = -EINVAL;
3214    }
3215    g_free(buf);
3216    return ret;
3217}
3218
3219static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
3220                                                Error **errp)
3221{
3222    uint64_t refcount_bits;
3223
3224    refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
3225    if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
3226        error_setg(errp, "Refcount width must be a power of two and may not "
3227                   "exceed 64 bits");
3228        return 0;
3229    }
3230
3231    if (version < 3 && refcount_bits != 16) {
3232        error_setg(errp, "Different refcount widths than 16 bits require "
3233                   "compatibility level 1.1 or above (use compat=1.1 or "
3234                   "greater)");
3235        return 0;
3236    }
3237
3238    return refcount_bits;
3239}
3240
3241static int coroutine_fn
3242qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
3243{
3244    BlockdevCreateOptionsQcow2 *qcow2_opts;
3245    QDict *options;
3246
3247    /*
3248     * Open the image file and write a minimal qcow2 header.
3249     *
3250     * We keep things simple and start with a zero-sized image. We also
3251     * do without refcount blocks or a L1 table for now. We'll fix the
3252     * inconsistency later.
3253     *
3254     * We do need a refcount table because growing the refcount table means
3255     * allocating two new refcount blocks - the seconds of which would be at
3256     * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
3257     * size for any qcow2 image.
3258     */
3259    BlockBackend *blk = NULL;
3260    BlockDriverState *bs = NULL;
3261    BlockDriverState *data_bs = NULL;
3262    QCowHeader *header;
3263    size_t cluster_size;
3264    int version;
3265    int refcount_order;
3266    uint64_t* refcount_table;
3267    Error *local_err = NULL;
3268    int ret;
3269
3270    assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
3271    qcow2_opts = &create_options->u.qcow2;
3272
3273    bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp);
3274    if (bs == NULL) {
3275        return -EIO;
3276    }
3277
3278    /* Validate options and set default values */
3279    if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) {
3280        error_setg(errp, "Image size must be a multiple of 512 bytes");
3281        ret = -EINVAL;
3282        goto out;
3283    }
3284
3285    if (qcow2_opts->has_version) {
3286        switch (qcow2_opts->version) {
3287        case BLOCKDEV_QCOW2_VERSION_V2:
3288            version = 2;
3289            break;
3290        case BLOCKDEV_QCOW2_VERSION_V3:
3291            version = 3;
3292            break;
3293        default:
3294            g_assert_not_reached();
3295        }
3296    } else {
3297        version = 3;
3298    }
3299
3300    if (qcow2_opts->has_cluster_size) {
3301        cluster_size = qcow2_opts->cluster_size;
3302    } else {
3303        cluster_size = DEFAULT_CLUSTER_SIZE;
3304    }
3305
3306    if (!validate_cluster_size(cluster_size, errp)) {
3307        ret = -EINVAL;
3308        goto out;
3309    }
3310
3311    if (!qcow2_opts->has_preallocation) {
3312        qcow2_opts->preallocation = PREALLOC_MODE_OFF;
3313    }
3314    if (qcow2_opts->has_backing_file &&
3315        qcow2_opts->preallocation != PREALLOC_MODE_OFF)
3316    {
3317        error_setg(errp, "Backing file and preallocation cannot be used at "
3318                   "the same time");
3319        ret = -EINVAL;
3320        goto out;
3321    }
3322    if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) {
3323        error_setg(errp, "Backing format cannot be used without backing file");
3324        ret = -EINVAL;
3325        goto out;
3326    }
3327
3328    if (!qcow2_opts->has_lazy_refcounts) {
3329        qcow2_opts->lazy_refcounts = false;
3330    }
3331    if (version < 3 && qcow2_opts->lazy_refcounts) {
3332        error_setg(errp, "Lazy refcounts only supported with compatibility "
3333                   "level 1.1 and above (use version=v3 or greater)");
3334        ret = -EINVAL;
3335        goto out;
3336    }
3337
3338    if (!qcow2_opts->has_refcount_bits) {
3339        qcow2_opts->refcount_bits = 16;
3340    }
3341    if (qcow2_opts->refcount_bits > 64 ||
3342        !is_power_of_2(qcow2_opts->refcount_bits))
3343    {
3344        error_setg(errp, "Refcount width must be a power of two and may not "
3345                   "exceed 64 bits");
3346        ret = -EINVAL;
3347        goto out;
3348    }
3349    if (version < 3 && qcow2_opts->refcount_bits != 16) {
3350        error_setg(errp, "Different refcount widths than 16 bits require "
3351                   "compatibility level 1.1 or above (use version=v3 or "
3352                   "greater)");
3353        ret = -EINVAL;
3354        goto out;
3355    }
3356    refcount_order = ctz32(qcow2_opts->refcount_bits);
3357
3358    if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) {
3359        error_setg(errp, "data-file-raw requires data-file");
3360        ret = -EINVAL;
3361        goto out;
3362    }
3363    if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) {
3364        error_setg(errp, "Backing file and data-file-raw cannot be used at "
3365                   "the same time");
3366        ret = -EINVAL;
3367        goto out;
3368    }
3369
3370    if (qcow2_opts->data_file) {
3371        if (version < 3) {
3372            error_setg(errp, "External data files are only supported with "
3373                       "compatibility level 1.1 and above (use version=v3 or "
3374                       "greater)");
3375            ret = -EINVAL;
3376            goto out;
3377        }
3378        data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp);
3379        if (data_bs == NULL) {
3380            ret = -EIO;
3381            goto out;
3382        }
3383    }
3384
3385    /* Create BlockBackend to write to the image */
3386    blk = blk_new(bdrv_get_aio_context(bs),
3387                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
3388    ret = blk_insert_bs(blk, bs, errp);
3389    if (ret < 0) {
3390        goto out;
3391    }
3392    blk_set_allow_write_beyond_eof(blk, true);
3393
3394    /* Write the header */
3395    QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
3396    header = g_malloc0(cluster_size);
3397    *header = (QCowHeader) {
3398        .magic                      = cpu_to_be32(QCOW_MAGIC),
3399        .version                    = cpu_to_be32(version),
3400        .cluster_bits               = cpu_to_be32(ctz32(cluster_size)),
3401        .size                       = cpu_to_be64(0),
3402        .l1_table_offset            = cpu_to_be64(0),
3403        .l1_size                    = cpu_to_be32(0),
3404        .refcount_table_offset      = cpu_to_be64(cluster_size),
3405        .refcount_table_clusters    = cpu_to_be32(1),
3406        .refcount_order             = cpu_to_be32(refcount_order),
3407        .header_length              = cpu_to_be32(sizeof(*header)),
3408    };
3409
3410    /* We'll update this to correct value later */
3411    header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
3412
3413    if (qcow2_opts->lazy_refcounts) {
3414        header->compatible_features |=
3415            cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
3416    }
3417    if (data_bs) {
3418        header->incompatible_features |=
3419            cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE);
3420    }
3421    if (qcow2_opts->data_file_raw) {
3422        header->autoclear_features |=
3423            cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW);
3424    }
3425
3426    ret = blk_pwrite(blk, 0, header, cluster_size, 0);
3427    g_free(header);
3428    if (ret < 0) {
3429        error_setg_errno(errp, -ret, "Could not write qcow2 header");
3430        goto out;
3431    }
3432
3433    /* Write a refcount table with one refcount block */
3434    refcount_table = g_malloc0(2 * cluster_size);
3435    refcount_table[0] = cpu_to_be64(2 * cluster_size);
3436    ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
3437    g_free(refcount_table);
3438
3439    if (ret < 0) {
3440        error_setg_errno(errp, -ret, "Could not write refcount table");
3441        goto out;
3442    }
3443
3444    blk_unref(blk);
3445    blk = NULL;
3446
3447    /*
3448     * And now open the image and make it consistent first (i.e. increase the
3449     * refcount of the cluster that is occupied by the header and the refcount
3450     * table)
3451     */
3452    options = qdict_new();
3453    qdict_put_str(options, "driver", "qcow2");
3454    qdict_put_str(options, "file", bs->node_name);
3455    if (data_bs) {
3456        qdict_put_str(options, "data-file", data_bs->node_name);
3457    }
3458    blk = blk_new_open(NULL, NULL, options,
3459                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
3460                       &local_err);
3461    if (blk == NULL) {
3462        error_propagate(errp, local_err);
3463        ret = -EIO;
3464        goto out;
3465    }
3466
3467    ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
3468    if (ret < 0) {
3469        error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
3470                         "header and refcount table");
3471        goto out;
3472
3473    } else if (ret != 0) {
3474        error_report("Huh, first cluster in empty image is already in use?");
3475        abort();
3476    }
3477
3478    /* Set the external data file if necessary */
3479    if (data_bs) {
3480        BDRVQcow2State *s = blk_bs(blk)->opaque;
3481        s->image_data_file = g_strdup(data_bs->filename);
3482    }
3483
3484    /* Create a full header (including things like feature table) */
3485    ret = qcow2_update_header(blk_bs(blk));
3486    if (ret < 0) {
3487        error_setg_errno(errp, -ret, "Could not update qcow2 header");
3488        goto out;
3489    }
3490
3491    /* Okay, now that we have a valid image, let's give it the right size */
3492    ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
3493                       errp);
3494    if (ret < 0) {
3495        error_prepend(errp, "Could not resize image: ");
3496        goto out;
3497    }
3498
3499    /* Want a backing file? There you go.*/
3500    if (qcow2_opts->has_backing_file) {
3501        const char *backing_format = NULL;
3502
3503        if (qcow2_opts->has_backing_fmt) {
3504            backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt);
3505        }
3506
3507        ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
3508                                       backing_format);
3509        if (ret < 0) {
3510            error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
3511                             "with format '%s'", qcow2_opts->backing_file,
3512                             backing_format);
3513            goto out;
3514        }
3515    }
3516
3517    /* Want encryption? There you go. */
3518    if (qcow2_opts->has_encrypt) {
3519        ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
3520        if (ret < 0) {
3521            goto out;
3522        }
3523    }
3524
3525    blk_unref(blk);
3526    blk = NULL;
3527
3528    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
3529     * Using BDRV_O_NO_IO, since encryption is now setup we don't want to
3530     * have to setup decryption context. We're not doing any I/O on the top
3531     * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does
3532     * not have effect.
3533     */
3534    options = qdict_new();
3535    qdict_put_str(options, "driver", "qcow2");
3536    qdict_put_str(options, "file", bs->node_name);
3537    if (data_bs) {
3538        qdict_put_str(options, "data-file", data_bs->node_name);
3539    }
3540    blk = blk_new_open(NULL, NULL, options,
3541                       BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
3542                       &local_err);
3543    if (blk == NULL) {
3544        error_propagate(errp, local_err);
3545        ret = -EIO;
3546        goto out;
3547    }
3548
3549    ret = 0;
3550out:
3551    blk_unref(blk);
3552    bdrv_unref(bs);
3553    bdrv_unref(data_bs);
3554    return ret;
3555}
3556
3557static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts,
3558                                             Error **errp)
3559{
3560    BlockdevCreateOptions *create_options = NULL;
3561    QDict *qdict;
3562    Visitor *v;
3563    BlockDriverState *bs = NULL;
3564    BlockDriverState *data_bs = NULL;
3565    Error *local_err = NULL;
3566    const char *val;
3567    int ret;
3568
3569    /* Only the keyval visitor supports the dotted syntax needed for
3570     * encryption, so go through a QDict before getting a QAPI type. Ignore
3571     * options meant for the protocol layer so that the visitor doesn't
3572     * complain. */
3573    qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts,
3574                                        true);
3575
3576    /* Handle encryption options */
3577    val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT);
3578    if (val && !strcmp(val, "on")) {
3579        qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow");
3580    } else if (val && !strcmp(val, "off")) {
3581        qdict_del(qdict, BLOCK_OPT_ENCRYPT);
3582    }
3583
3584    val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT);
3585    if (val && !strcmp(val, "aes")) {
3586        qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow");
3587    }
3588
3589    /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into
3590     * version=v2/v3 below. */
3591    val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL);
3592    if (val && !strcmp(val, "0.10")) {
3593        qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2");
3594    } else if (val && !strcmp(val, "1.1")) {
3595        qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
3596    }
3597
3598    /* Change legacy command line options into QMP ones */
3599    static const QDictRenames opt_renames[] = {
3600        { BLOCK_OPT_BACKING_FILE,       "backing-file" },
3601        { BLOCK_OPT_BACKING_FMT,        "backing-fmt" },
3602        { BLOCK_OPT_CLUSTER_SIZE,       "cluster-size" },
3603        { BLOCK_OPT_LAZY_REFCOUNTS,     "lazy-refcounts" },
3604        { BLOCK_OPT_REFCOUNT_BITS,      "refcount-bits" },
3605        { BLOCK_OPT_ENCRYPT,            BLOCK_OPT_ENCRYPT_FORMAT },
3606        { BLOCK_OPT_COMPAT_LEVEL,       "version" },
3607        { BLOCK_OPT_DATA_FILE_RAW,      "data-file-raw" },
3608        { NULL, NULL },
3609    };
3610
3611    if (!qdict_rename_keys(qdict, opt_renames, errp)) {
3612        ret = -EINVAL;
3613        goto finish;
3614    }
3615
3616    /* Create and open the file (protocol layer) */
3617    ret = bdrv_create_file(filename, opts, errp);
3618    if (ret < 0) {
3619        goto finish;
3620    }
3621
3622    bs = bdrv_open(filename, NULL, NULL,
3623                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
3624    if (bs == NULL) {
3625        ret = -EIO;
3626        goto finish;
3627    }
3628
3629    /* Create and open an external data file (protocol layer) */
3630    val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
3631    if (val) {
3632        ret = bdrv_create_file(val, opts, errp);
3633        if (ret < 0) {
3634            goto finish;
3635        }
3636
3637        data_bs = bdrv_open(val, NULL, NULL,
3638                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
3639                            errp);
3640        if (data_bs == NULL) {
3641            ret = -EIO;
3642            goto finish;
3643        }
3644
3645        qdict_del(qdict, BLOCK_OPT_DATA_FILE);
3646        qdict_put_str(qdict, "data-file", data_bs->node_name);
3647    }
3648
3649    /* Set 'driver' and 'node' options */
3650    qdict_put_str(qdict, "driver", "qcow2");
3651    qdict_put_str(qdict, "file", bs->node_name);
3652
3653    /* Now get the QAPI type BlockdevCreateOptions */
3654    v = qobject_input_visitor_new_flat_confused(qdict, errp);
3655    if (!v) {
3656        ret = -EINVAL;
3657        goto finish;
3658    }
3659
3660    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
3661    visit_free(v);
3662
3663    if (local_err) {
3664        error_propagate(errp, local_err);
3665        ret = -EINVAL;
3666        goto finish;
3667    }
3668
3669    /* Silently round up size */
3670    create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size,
3671                                            BDRV_SECTOR_SIZE);
3672
3673    /* Create the qcow2 image (format layer) */
3674    ret = qcow2_co_create(create_options, errp);
3675    if (ret < 0) {
3676        goto finish;
3677    }
3678
3679    ret = 0;
3680finish:
3681    qobject_unref(qdict);
3682    bdrv_unref(bs);
3683    bdrv_unref(data_bs);
3684    qapi_free_BlockdevCreateOptions(create_options);
3685    return ret;
3686}
3687
3688
3689static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
3690{
3691    int64_t nr;
3692    int res;
3693
3694    /* Clamp to image length, before checking status of underlying sectors */
3695    if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
3696        bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
3697    }
3698
3699    if (!bytes) {
3700        return true;
3701    }
3702    res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
3703    return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
3704}
3705
3706static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
3707    int64_t offset, int bytes, BdrvRequestFlags flags)
3708{
3709    int ret;
3710    BDRVQcow2State *s = bs->opaque;
3711
3712    uint32_t head = offset % s->cluster_size;
3713    uint32_t tail = (offset + bytes) % s->cluster_size;
3714
3715    trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
3716    if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
3717        tail = 0;
3718    }
3719
3720    if (head || tail) {
3721        uint64_t off;
3722        unsigned int nr;
3723
3724        assert(head + bytes <= s->cluster_size);
3725
3726        /* check whether remainder of cluster already reads as zero */
3727        if (!(is_zero(bs, offset - head, head) &&
3728              is_zero(bs, offset + bytes,
3729                      tail ? s->cluster_size - tail : 0))) {
3730            return -ENOTSUP;
3731        }
3732
3733        qemu_co_mutex_lock(&s->lock);
3734        /* We can have new write after previous check */
3735        offset = QEMU_ALIGN_DOWN(offset, s->cluster_size);
3736        bytes = s->cluster_size;
3737        nr = s->cluster_size;
3738        ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
3739        if (ret != QCOW2_CLUSTER_UNALLOCATED &&
3740            ret != QCOW2_CLUSTER_ZERO_PLAIN &&
3741            ret != QCOW2_CLUSTER_ZERO_ALLOC) {
3742            qemu_co_mutex_unlock(&s->lock);
3743            return -ENOTSUP;
3744        }
3745    } else {
3746        qemu_co_mutex_lock(&s->lock);
3747    }
3748
3749    trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
3750
3751    /* Whatever is left can use real zero clusters */
3752    ret = qcow2_cluster_zeroize(bs, offset, bytes, flags);
3753    qemu_co_mutex_unlock(&s->lock);
3754
3755    return ret;
3756}
3757
3758static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
3759                                          int64_t offset, int bytes)
3760{
3761    int ret;
3762    BDRVQcow2State *s = bs->opaque;
3763
3764    if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
3765        assert(bytes < s->cluster_size);
3766        /* Ignore partial clusters, except for the special case of the
3767         * complete partial cluster at the end of an unaligned file */
3768        if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
3769            offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
3770            return -ENOTSUP;
3771        }
3772    }
3773
3774    qemu_co_mutex_lock(&s->lock);
3775    ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
3776                                false);
3777    qemu_co_mutex_unlock(&s->lock);
3778    return ret;
3779}
3780
3781static int coroutine_fn
3782qcow2_co_copy_range_from(BlockDriverState *bs,
3783                         BdrvChild *src, uint64_t src_offset,
3784                         BdrvChild *dst, uint64_t dst_offset,
3785                         uint64_t bytes, BdrvRequestFlags read_flags,
3786                         BdrvRequestFlags write_flags)
3787{
3788    BDRVQcow2State *s = bs->opaque;
3789    int ret;
3790    unsigned int cur_bytes; /* number of bytes in current iteration */
3791    BdrvChild *child = NULL;
3792    BdrvRequestFlags cur_write_flags;
3793
3794    assert(!bs->encrypted);
3795    qemu_co_mutex_lock(&s->lock);
3796
3797    while (bytes != 0) {
3798        uint64_t copy_offset = 0;
3799        /* prepare next request */
3800        cur_bytes = MIN(bytes, INT_MAX);
3801        cur_write_flags = write_flags;
3802
3803        ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, &copy_offset);
3804        if (ret < 0) {
3805            goto out;
3806        }
3807
3808        switch (ret) {
3809        case QCOW2_CLUSTER_UNALLOCATED:
3810            if (bs->backing && bs->backing->bs) {
3811                int64_t backing_length = bdrv_getlength(bs->backing->bs);
3812                if (src_offset >= backing_length) {
3813                    cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3814                } else {
3815                    child = bs->backing;
3816                    cur_bytes = MIN(cur_bytes, backing_length - src_offset);
3817                    copy_offset = src_offset;
3818                }
3819            } else {
3820                cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3821            }
3822            break;
3823
3824        case QCOW2_CLUSTER_ZERO_PLAIN:
3825        case QCOW2_CLUSTER_ZERO_ALLOC:
3826            cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3827            break;
3828
3829        case QCOW2_CLUSTER_COMPRESSED:
3830            ret = -ENOTSUP;
3831            goto out;
3832
3833        case QCOW2_CLUSTER_NORMAL:
3834            child = s->data_file;
3835            copy_offset += offset_into_cluster(s, src_offset);
3836            if ((copy_offset & 511) != 0) {
3837                ret = -EIO;
3838                goto out;
3839            }
3840            break;
3841
3842        default:
3843            abort();
3844        }
3845        qemu_co_mutex_unlock(&s->lock);
3846        ret = bdrv_co_copy_range_from(child,
3847                                      copy_offset,
3848                                      dst, dst_offset,
3849                                      cur_bytes, read_flags, cur_write_flags);
3850        qemu_co_mutex_lock(&s->lock);
3851        if (ret < 0) {
3852            goto out;
3853        }
3854
3855        bytes -= cur_bytes;
3856        src_offset += cur_bytes;
3857        dst_offset += cur_bytes;
3858    }
3859    ret = 0;
3860
3861out:
3862    qemu_co_mutex_unlock(&s->lock);
3863    return ret;
3864}
3865
3866static int coroutine_fn
3867qcow2_co_copy_range_to(BlockDriverState *bs,
3868                       BdrvChild *src, uint64_t src_offset,
3869                       BdrvChild *dst, uint64_t dst_offset,
3870                       uint64_t bytes, BdrvRequestFlags read_flags,
3871                       BdrvRequestFlags write_flags)
3872{
3873    BDRVQcow2State *s = bs->opaque;
3874    int offset_in_cluster;
3875    int ret;
3876    unsigned int cur_bytes; /* number of sectors in current iteration */
3877    uint64_t cluster_offset;
3878    QCowL2Meta *l2meta = NULL;
3879
3880    assert(!bs->encrypted);
3881
3882    qemu_co_mutex_lock(&s->lock);
3883
3884    while (bytes != 0) {
3885
3886        l2meta = NULL;
3887
3888        offset_in_cluster = offset_into_cluster(s, dst_offset);
3889        cur_bytes = MIN(bytes, INT_MAX);
3890
3891        /* TODO:
3892         * If src->bs == dst->bs, we could simply copy by incrementing
3893         * the refcnt, without copying user data.
3894         * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
3895        ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
3896                                         &cluster_offset, &l2meta);
3897        if (ret < 0) {
3898            goto fail;
3899        }
3900
3901        assert((cluster_offset & 511) == 0);
3902
3903        ret = qcow2_pre_write_overlap_check(bs, 0,
3904                cluster_offset + offset_in_cluster, cur_bytes, true);
3905        if (ret < 0) {
3906            goto fail;
3907        }
3908
3909        qemu_co_mutex_unlock(&s->lock);
3910        ret = bdrv_co_copy_range_to(src, src_offset,
3911                                    s->data_file,
3912                                    cluster_offset + offset_in_cluster,
3913                                    cur_bytes, read_flags, write_flags);
3914        qemu_co_mutex_lock(&s->lock);
3915        if (ret < 0) {
3916            goto fail;
3917        }
3918
3919        ret = qcow2_handle_l2meta(bs, &l2meta, true);
3920        if (ret) {
3921            goto fail;
3922        }
3923
3924        bytes -= cur_bytes;
3925        src_offset += cur_bytes;
3926        dst_offset += cur_bytes;
3927    }
3928    ret = 0;
3929
3930fail:
3931    qcow2_handle_l2meta(bs, &l2meta, false);
3932
3933    qemu_co_mutex_unlock(&s->lock);
3934
3935    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
3936
3937    return ret;
3938}
3939
3940static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
3941                                          bool exact, PreallocMode prealloc,
3942                                          Error **errp)
3943{
3944    BDRVQcow2State *s = bs->opaque;
3945    uint64_t old_length;
3946    int64_t new_l1_size;
3947    int ret;
3948    QDict *options;
3949
3950    if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
3951        prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
3952    {
3953        error_setg(errp, "Unsupported preallocation mode '%s'",
3954                   PreallocMode_str(prealloc));
3955        return -ENOTSUP;
3956    }
3957
3958    if (offset & 511) {
3959        error_setg(errp, "The new size must be a multiple of 512");
3960        return -EINVAL;
3961    }
3962
3963    qemu_co_mutex_lock(&s->lock);
3964
3965    /* cannot proceed if image has snapshots */
3966    if (s->nb_snapshots) {
3967        error_setg(errp, "Can't resize an image which has snapshots");
3968        ret = -ENOTSUP;
3969        goto fail;
3970    }
3971
3972    /* cannot proceed if image has bitmaps */
3973    if (qcow2_truncate_bitmaps_check(bs, errp)) {
3974        ret = -ENOTSUP;
3975        goto fail;
3976    }
3977
3978    old_length = bs->total_sectors * BDRV_SECTOR_SIZE;
3979    new_l1_size = size_to_l1(s, offset);
3980
3981    if (offset < old_length) {
3982        int64_t last_cluster, old_file_size;
3983        if (prealloc != PREALLOC_MODE_OFF) {
3984            error_setg(errp,
3985                       "Preallocation can't be used for shrinking an image");
3986            ret = -EINVAL;
3987            goto fail;
3988        }
3989
3990        ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
3991                                    old_length - ROUND_UP(offset,
3992                                                          s->cluster_size),
3993                                    QCOW2_DISCARD_ALWAYS, true);
3994        if (ret < 0) {
3995            error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
3996            goto fail;
3997        }
3998
3999        ret = qcow2_shrink_l1_table(bs, new_l1_size);
4000        if (ret < 0) {
4001            error_setg_errno(errp, -ret,
4002                             "Failed to reduce the number of L2 tables");
4003            goto fail;
4004        }
4005
4006        ret = qcow2_shrink_reftable(bs);
4007        if (ret < 0) {
4008            error_setg_errno(errp, -ret,
4009                             "Failed to discard unused refblocks");
4010            goto fail;
4011        }
4012
4013        old_file_size = bdrv_getlength(bs->file->bs);
4014        if (old_file_size < 0) {
4015            error_setg_errno(errp, -old_file_size,
4016                             "Failed to inquire current file length");
4017            ret = old_file_size;
4018            goto fail;
4019        }
4020        last_cluster = qcow2_get_last_cluster(bs, old_file_size);
4021        if (last_cluster < 0) {
4022            error_setg_errno(errp, -last_cluster,
4023                             "Failed to find the last cluster");
4024            ret = last_cluster;
4025            goto fail;
4026        }
4027        if ((last_cluster + 1) * s->cluster_size < old_file_size) {
4028            Error *local_err = NULL;
4029
4030            /*
4031             * Do not pass @exact here: It will not help the user if
4032             * we get an error here just because they wanted to shrink
4033             * their qcow2 image (on a block device) with qemu-img.
4034             * (And on the qcow2 layer, the @exact requirement is
4035             * always fulfilled, so there is no need to pass it on.)
4036             */
4037            bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
4038                             false, PREALLOC_MODE_OFF, &local_err);
4039            if (local_err) {
4040                warn_reportf_err(local_err,
4041                                 "Failed to truncate the tail of the image: ");
4042            }
4043        }
4044    } else {
4045        ret = qcow2_grow_l1_table(bs, new_l1_size, true);
4046        if (ret < 0) {
4047            error_setg_errno(errp, -ret, "Failed to grow the L1 table");
4048            goto fail;
4049        }
4050    }
4051
4052    switch (prealloc) {
4053    case PREALLOC_MODE_OFF:
4054        if (has_data_file(bs)) {
4055            /*
4056             * If the caller wants an exact resize, the external data
4057             * file should be resized to the exact target size, too,
4058             * so we pass @exact here.
4059             */
4060            ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp);
4061            if (ret < 0) {
4062                goto fail;
4063            }
4064        }
4065        break;
4066
4067    case PREALLOC_MODE_METADATA:
4068        ret = preallocate_co(bs, old_length, offset, prealloc, errp);
4069        if (ret < 0) {
4070            goto fail;
4071        }
4072        break;
4073
4074    case PREALLOC_MODE_FALLOC:
4075    case PREALLOC_MODE_FULL:
4076    {
4077        int64_t allocation_start, host_offset, guest_offset;
4078        int64_t clusters_allocated;
4079        int64_t old_file_size, new_file_size;
4080        uint64_t nb_new_data_clusters, nb_new_l2_tables;
4081
4082        /* With a data file, preallocation means just allocating the metadata
4083         * and forwarding the truncate request to the data file */
4084        if (has_data_file(bs)) {
4085            ret = preallocate_co(bs, old_length, offset, prealloc, errp);
4086            if (ret < 0) {
4087                goto fail;
4088            }
4089            break;
4090        }
4091
4092        old_file_size = bdrv_getlength(bs->file->bs);
4093        if (old_file_size < 0) {
4094            error_setg_errno(errp, -old_file_size,
4095                             "Failed to inquire current file length");
4096            ret = old_file_size;
4097            goto fail;
4098        }
4099        old_file_size = ROUND_UP(old_file_size, s->cluster_size);
4100
4101        nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
4102                                            s->cluster_size);
4103
4104        /* This is an overestimation; we will not actually allocate space for
4105         * these in the file but just make sure the new refcount structures are
4106         * able to cover them so we will not have to allocate new refblocks
4107         * while entering the data blocks in the potentially new L2 tables.
4108         * (We do not actually care where the L2 tables are placed. Maybe they
4109         *  are already allocated or they can be placed somewhere before
4110         *  @old_file_size. It does not matter because they will be fully
4111         *  allocated automatically, so they do not need to be covered by the
4112         *  preallocation. All that matters is that we will not have to allocate
4113         *  new refcount structures for them.) */
4114        nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
4115                                        s->cluster_size / sizeof(uint64_t));
4116        /* The cluster range may not be aligned to L2 boundaries, so add one L2
4117         * table for a potential head/tail */
4118        nb_new_l2_tables++;
4119
4120        allocation_start = qcow2_refcount_area(bs, old_file_size,
4121                                               nb_new_data_clusters +
4122                                               nb_new_l2_tables,
4123                                               true, 0, 0);
4124        if (allocation_start < 0) {
4125            error_setg_errno(errp, -allocation_start,
4126                             "Failed to resize refcount structures");
4127            ret = allocation_start;
4128            goto fail;
4129        }
4130
4131        clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
4132                                                     nb_new_data_clusters);
4133        if (clusters_allocated < 0) {
4134            error_setg_errno(errp, -clusters_allocated,
4135                             "Failed to allocate data clusters");
4136            ret = clusters_allocated;
4137            goto fail;
4138        }
4139
4140        assert(clusters_allocated == nb_new_data_clusters);
4141
4142        /* Allocate the data area */
4143        new_file_size = allocation_start +
4144                        nb_new_data_clusters * s->cluster_size;
4145        /* Image file grows, so @exact does not matter */
4146        ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp);
4147        if (ret < 0) {
4148            error_prepend(errp, "Failed to resize underlying file: ");
4149            qcow2_free_clusters(bs, allocation_start,
4150                                nb_new_data_clusters * s->cluster_size,
4151                                QCOW2_DISCARD_OTHER);
4152            goto fail;
4153        }
4154
4155        /* Create the necessary L2 entries */
4156        host_offset = allocation_start;
4157        guest_offset = old_length;
4158        while (nb_new_data_clusters) {
4159            int64_t nb_clusters = MIN(
4160                nb_new_data_clusters,
4161                s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
4162            QCowL2Meta allocation = {
4163                .offset       = guest_offset,
4164                .alloc_offset = host_offset,
4165                .nb_clusters  = nb_clusters,
4166            };
4167            qemu_co_queue_init(&allocation.dependent_requests);
4168
4169            ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
4170            if (ret < 0) {
4171                error_setg_errno(errp, -ret, "Failed to update L2 tables");
4172                qcow2_free_clusters(bs, host_offset,
4173                                    nb_new_data_clusters * s->cluster_size,
4174                                    QCOW2_DISCARD_OTHER);
4175                goto fail;
4176            }
4177
4178            guest_offset += nb_clusters * s->cluster_size;
4179            host_offset += nb_clusters * s->cluster_size;
4180            nb_new_data_clusters -= nb_clusters;
4181        }
4182        break;
4183    }
4184
4185    default:
4186        g_assert_not_reached();
4187    }
4188
4189    if (prealloc != PREALLOC_MODE_OFF) {
4190        /* Flush metadata before actually changing the image size */
4191        ret = qcow2_write_caches(bs);
4192        if (ret < 0) {
4193            error_setg_errno(errp, -ret,
4194                             "Failed to flush the preallocated area to disk");
4195            goto fail;
4196        }
4197    }
4198
4199    bs->total_sectors = offset / BDRV_SECTOR_SIZE;
4200
4201    /* write updated header.size */
4202    offset = cpu_to_be64(offset);
4203    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
4204                           &offset, sizeof(uint64_t));
4205    if (ret < 0) {
4206        error_setg_errno(errp, -ret, "Failed to update the image size");
4207        goto fail;
4208    }
4209
4210    s->l1_vm_state_index = new_l1_size;
4211
4212    /* Update cache sizes */
4213    options = qdict_clone_shallow(bs->options);
4214    ret = qcow2_update_options(bs, options, s->flags, errp);
4215    qobject_unref(options);
4216    if (ret < 0) {
4217        goto fail;
4218    }
4219    ret = 0;
4220fail:
4221    qemu_co_mutex_unlock(&s->lock);
4222    return ret;
4223}
4224
4225/* XXX: put compressed sectors first, then all the cluster aligned
4226   tables to avoid losing bytes in alignment */
4227static coroutine_fn int
4228qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
4229                                 uint64_t offset, uint64_t bytes,
4230                                 QEMUIOVector *qiov, size_t qiov_offset)
4231{
4232    BDRVQcow2State *s = bs->opaque;
4233    int ret;
4234    ssize_t out_len;
4235    uint8_t *buf, *out_buf;
4236    uint64_t cluster_offset;
4237
4238    if (has_data_file(bs)) {
4239        return -ENOTSUP;
4240    }
4241
4242    if (bytes == 0) {
4243        /* align end of file to a sector boundary to ease reading with
4244           sector based I/Os */
4245        int64_t len = bdrv_getlength(bs->file->bs);
4246        if (len < 0) {
4247            return len;
4248        }
4249        return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
4250    }
4251
4252    if (offset_into_cluster(s, offset)) {
4253        return -EINVAL;
4254    }
4255
4256    buf = qemu_blockalign(bs, s->cluster_size);
4257    if (bytes != s->cluster_size) {
4258        if (bytes > s->cluster_size ||
4259            offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
4260        {
4261            qemu_vfree(buf);
4262            return -EINVAL;
4263        }
4264        /* Zero-pad last write if image size is not cluster aligned */
4265        memset(buf + bytes, 0, s->cluster_size - bytes);
4266    }
4267    qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
4268
4269    out_buf = g_malloc(s->cluster_size);
4270
4271    out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1,
4272                                buf, s->cluster_size);
4273    if (out_len == -ENOMEM) {
4274        /* could not compress: write normal cluster */
4275        ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
4276        if (ret < 0) {
4277            goto fail;
4278        }
4279        goto success;
4280    } else if (out_len < 0) {
4281        ret = -EINVAL;
4282        goto fail;
4283    }
4284
4285    qemu_co_mutex_lock(&s->lock);
4286    ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len,
4287                                                &cluster_offset);
4288    if (ret < 0) {
4289        qemu_co_mutex_unlock(&s->lock);
4290        goto fail;
4291    }
4292
4293    ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true);
4294    qemu_co_mutex_unlock(&s->lock);
4295    if (ret < 0) {
4296        goto fail;
4297    }
4298
4299    BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
4300    ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
4301    if (ret < 0) {
4302        goto fail;
4303    }
4304success:
4305    ret = 0;
4306fail:
4307    qemu_vfree(buf);
4308    g_free(out_buf);
4309    return ret;
4310}
4311
4312static int coroutine_fn
4313qcow2_co_preadv_compressed(BlockDriverState *bs,
4314                           uint64_t file_cluster_offset,
4315                           uint64_t offset,
4316                           uint64_t bytes,
4317                           QEMUIOVector *qiov,
4318                           size_t qiov_offset)
4319{
4320    BDRVQcow2State *s = bs->opaque;
4321    int ret = 0, csize, nb_csectors;
4322    uint64_t coffset;
4323    uint8_t *buf, *out_buf;
4324    int offset_in_cluster = offset_into_cluster(s, offset);
4325
4326    coffset = file_cluster_offset & s->cluster_offset_mask;
4327    nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
4328    csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
4329        (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK);
4330
4331    buf = g_try_malloc(csize);
4332    if (!buf) {
4333        return -ENOMEM;
4334    }
4335
4336    out_buf = qemu_blockalign(bs, s->cluster_size);
4337
4338    BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
4339    ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
4340    if (ret < 0) {
4341        goto fail;
4342    }
4343
4344    if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
4345        ret = -EIO;
4346        goto fail;
4347    }
4348
4349    qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);
4350
4351fail:
4352    qemu_vfree(out_buf);
4353    g_free(buf);
4354
4355    return ret;
4356}
4357
4358static int make_completely_empty(BlockDriverState *bs)
4359{
4360    BDRVQcow2State *s = bs->opaque;
4361    Error *local_err = NULL;
4362    int ret, l1_clusters;
4363    int64_t offset;
4364    uint64_t *new_reftable = NULL;
4365    uint64_t rt_entry, l1_size2;
4366    struct {
4367        uint64_t l1_offset;
4368        uint64_t reftable_offset;
4369        uint32_t reftable_clusters;
4370    } QEMU_PACKED l1_ofs_rt_ofs_cls;
4371
4372    ret = qcow2_cache_empty(bs, s->l2_table_cache);
4373    if (ret < 0) {
4374        goto fail;
4375    }
4376
4377    ret = qcow2_cache_empty(bs, s->refcount_block_cache);
4378    if (ret < 0) {
4379        goto fail;
4380    }
4381
4382    /* Refcounts will be broken utterly */
4383    ret = qcow2_mark_dirty(bs);
4384    if (ret < 0) {
4385        goto fail;
4386    }
4387
4388    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4389
4390    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4391    l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
4392
4393    /* After this call, neither the in-memory nor the on-disk refcount
4394     * information accurately describe the actual references */
4395
4396    ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
4397                             l1_clusters * s->cluster_size, 0);
4398    if (ret < 0) {
4399        goto fail_broken_refcounts;
4400    }
4401    memset(s->l1_table, 0, l1_size2);
4402
4403    BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
4404
4405    /* Overwrite enough clusters at the beginning of the sectors to place
4406     * the refcount table, a refcount block and the L1 table in; this may
4407     * overwrite parts of the existing refcount and L1 table, which is not
4408     * an issue because the dirty flag is set, complete data loss is in fact
4409     * desired and partial data loss is consequently fine as well */
4410    ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
4411                             (2 + l1_clusters) * s->cluster_size, 0);
4412    /* This call (even if it failed overall) may have overwritten on-disk
4413     * refcount structures; in that case, the in-memory refcount information
4414     * will probably differ from the on-disk information which makes the BDS
4415     * unusable */
4416    if (ret < 0) {
4417        goto fail_broken_refcounts;
4418    }
4419
4420    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4421    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
4422
4423    /* "Create" an empty reftable (one cluster) directly after the image
4424     * header and an empty L1 table three clusters after the image header;
4425     * the cluster between those two will be used as the first refblock */
4426    l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
4427    l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
4428    l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
4429    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
4430                           &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
4431    if (ret < 0) {
4432        goto fail_broken_refcounts;
4433    }
4434
4435    s->l1_table_offset = 3 * s->cluster_size;
4436
4437    new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
4438    if (!new_reftable) {
4439        ret = -ENOMEM;
4440        goto fail_broken_refcounts;
4441    }
4442
4443    s->refcount_table_offset = s->cluster_size;
4444    s->refcount_table_size   = s->cluster_size / sizeof(uint64_t);
4445    s->max_refcount_table_index = 0;
4446
4447    g_free(s->refcount_table);
4448    s->refcount_table = new_reftable;
4449    new_reftable = NULL;
4450
4451    /* Now the in-memory refcount information again corresponds to the on-disk
4452     * information (reftable is empty and no refblocks (the refblock cache is
4453     * empty)); however, this means some clusters (e.g. the image header) are
4454     * referenced, but not refcounted, but the normal qcow2 code assumes that
4455     * the in-memory information is always correct */
4456
4457    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
4458
4459    /* Enter the first refblock into the reftable */
4460    rt_entry = cpu_to_be64(2 * s->cluster_size);
4461    ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
4462                           &rt_entry, sizeof(rt_entry));
4463    if (ret < 0) {
4464        goto fail_broken_refcounts;
4465    }
4466    s->refcount_table[0] = 2 * s->cluster_size;
4467
4468    s->free_cluster_index = 0;
4469    assert(3 + l1_clusters <= s->refcount_block_size);
4470    offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
4471    if (offset < 0) {
4472        ret = offset;
4473        goto fail_broken_refcounts;
4474    } else if (offset > 0) {
4475        error_report("First cluster in emptied image is in use");
4476        abort();
4477    }
4478
4479    /* Now finally the in-memory information corresponds to the on-disk
4480     * structures and is correct */
4481    ret = qcow2_mark_clean(bs);
4482    if (ret < 0) {
4483        goto fail;
4484    }
4485
4486    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
4487                        PREALLOC_MODE_OFF, &local_err);
4488    if (ret < 0) {
4489        error_report_err(local_err);
4490        goto fail;
4491    }
4492
4493    return 0;
4494
4495fail_broken_refcounts:
4496    /* The BDS is unusable at this point. If we wanted to make it usable, we
4497     * would have to call qcow2_refcount_close(), qcow2_refcount_init(),
4498     * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
4499     * again. However, because the functions which could have caused this error
4500     * path to be taken are used by those functions as well, it's very likely
4501     * that that sequence will fail as well. Therefore, just eject the BDS. */
4502    bs->drv = NULL;
4503
4504fail:
4505    g_free(new_reftable);
4506    return ret;
4507}
4508
4509static int qcow2_make_empty(BlockDriverState *bs)
4510{
4511    BDRVQcow2State *s = bs->opaque;
4512    uint64_t offset, end_offset;
4513    int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
4514    int l1_clusters, ret = 0;
4515
4516    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4517
4518    if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
4519        3 + l1_clusters <= s->refcount_block_size &&
4520        s->crypt_method_header != QCOW_CRYPT_LUKS &&
4521        !has_data_file(bs)) {
4522        /* The following function only works for qcow2 v3 images (it
4523         * requires the dirty flag) and only as long as there are no
4524         * features that reserve extra clusters (such as snapshots,
4525         * LUKS header, or persistent bitmaps), because it completely
4526         * empties the image.  Furthermore, the L1 table and three
4527         * additional clusters (image header, refcount table, one
4528         * refcount block) have to fit inside one refcount block. It
4529         * only resets the image file, i.e. does not work with an
4530         * external data file. */
4531        return make_completely_empty(bs);
4532    }
4533
4534    /* This fallback code simply discards every active cluster; this is slow,
4535     * but works in all cases */
4536    end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
4537    for (offset = 0; offset < end_offset; offset += step) {
4538        /* As this function is generally used after committing an external
4539         * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
4540         * default action for this kind of discard is to pass the discard,
4541         * which will ideally result in an actually smaller image file, as
4542         * is probably desired. */
4543        ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
4544                                    QCOW2_DISCARD_SNAPSHOT, true);
4545        if (ret < 0) {
4546            break;
4547        }
4548    }
4549
4550    return ret;
4551}
4552
4553static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
4554{
4555    BDRVQcow2State *s = bs->opaque;
4556    int ret;
4557
4558    qemu_co_mutex_lock(&s->lock);
4559    ret = qcow2_write_caches(bs);
4560    qemu_co_mutex_unlock(&s->lock);
4561
4562    return ret;
4563}
4564
4565static ssize_t qcow2_measure_crypto_hdr_init_func(QCryptoBlock *block,
4566        size_t headerlen, void *opaque, Error **errp)
4567{
4568    size_t *headerlenp = opaque;
4569
4570    /* Stash away the payload size */
4571    *headerlenp = headerlen;
4572    return 0;
4573}
4574
4575static ssize_t qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block,
4576        size_t offset, const uint8_t *buf, size_t buflen,
4577        void *opaque, Error **errp)
4578{
4579    /* Discard the bytes, we're not actually writing to an image */
4580    return buflen;
4581}
4582
4583/* Determine the number of bytes for the LUKS payload */
4584static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len,
4585                                         Error **errp)
4586{
4587    QDict *opts_qdict;
4588    QDict *cryptoopts_qdict;
4589    QCryptoBlockCreateOptions *cryptoopts;
4590    QCryptoBlock *crypto;
4591
4592    /* Extract "encrypt." options into a qdict */
4593    opts_qdict = qemu_opts_to_qdict(opts, NULL);
4594    qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
4595    qobject_unref(opts_qdict);
4596
4597    /* Build QCryptoBlockCreateOptions object from qdict */
4598    qdict_put_str(cryptoopts_qdict, "format", "luks");
4599    cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp);
4600    qobject_unref(cryptoopts_qdict);
4601    if (!cryptoopts) {
4602        return false;
4603    }
4604
4605    /* Fake LUKS creation in order to determine the payload size */
4606    crypto = qcrypto_block_create(cryptoopts, "encrypt.",
4607                                  qcow2_measure_crypto_hdr_init_func,
4608                                  qcow2_measure_crypto_hdr_write_func,
4609                                  len, errp);
4610    qapi_free_QCryptoBlockCreateOptions(cryptoopts);
4611    if (!crypto) {
4612        return false;
4613    }
4614
4615    qcrypto_block_free(crypto);
4616    return true;
4617}
4618
4619static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
4620                                       Error **errp)
4621{
4622    Error *local_err = NULL;
4623    BlockMeasureInfo *info;
4624    uint64_t required = 0; /* bytes that contribute to required size */
4625    uint64_t virtual_size; /* disk size as seen by guest */
4626    uint64_t refcount_bits;
4627    uint64_t l2_tables;
4628    uint64_t luks_payload_size = 0;
4629    size_t cluster_size;
4630    int version;
4631    char *optstr;
4632    PreallocMode prealloc;
4633    bool has_backing_file;
4634    bool has_luks;
4635
4636    /* Parse image creation options */
4637    cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err);
4638    if (local_err) {
4639        goto err;
4640    }
4641
4642    version = qcow2_opt_get_version_del(opts, &local_err);
4643    if (local_err) {
4644        goto err;
4645    }
4646
4647    refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
4648    if (local_err) {
4649        goto err;
4650    }
4651
4652    optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
4653    prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
4654                               PREALLOC_MODE_OFF, &local_err);
4655    g_free(optstr);
4656    if (local_err) {
4657        goto err;
4658    }
4659
4660    optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
4661    has_backing_file = !!optstr;
4662    g_free(optstr);
4663
4664    optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
4665    has_luks = optstr && strcmp(optstr, "luks") == 0;
4666    g_free(optstr);
4667
4668    if (has_luks) {
4669        size_t headerlen;
4670
4671        if (!qcow2_measure_luks_headerlen(opts, &headerlen, &local_err)) {
4672            goto err;
4673        }
4674
4675        luks_payload_size = ROUND_UP(headerlen, cluster_size);
4676    }
4677
4678    virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
4679    virtual_size = ROUND_UP(virtual_size, cluster_size);
4680
4681    /* Check that virtual disk size is valid */
4682    l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
4683                             cluster_size / sizeof(uint64_t));
4684    if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) {
4685        error_setg(&local_err, "The image size is too large "
4686                               "(try using a larger cluster size)");
4687        goto err;
4688    }
4689
4690    /* Account for input image */
4691    if (in_bs) {
4692        int64_t ssize = bdrv_getlength(in_bs);
4693        if (ssize < 0) {
4694            error_setg_errno(&local_err, -ssize,
4695                             "Unable to get image virtual_size");
4696            goto err;
4697        }
4698
4699        virtual_size = ROUND_UP(ssize, cluster_size);
4700
4701        if (has_backing_file) {
4702            /* We don't how much of the backing chain is shared by the input
4703             * image and the new image file.  In the worst case the new image's
4704             * backing file has nothing in common with the input image.  Be
4705             * conservative and assume all clusters need to be written.
4706             */
4707            required = virtual_size;
4708        } else {
4709            int64_t offset;
4710            int64_t pnum = 0;
4711
4712            for (offset = 0; offset < ssize; offset += pnum) {
4713                int ret;
4714
4715                ret = bdrv_block_status_above(in_bs, NULL, offset,
4716                                              ssize - offset, &pnum, NULL,
4717                                              NULL);
4718                if (ret < 0) {
4719                    error_setg_errno(&local_err, -ret,
4720                                     "Unable to get block status");
4721                    goto err;
4722                }
4723
4724                if (ret & BDRV_BLOCK_ZERO) {
4725                    /* Skip zero regions (safe with no backing file) */
4726                } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
4727                           (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
4728                    /* Extend pnum to end of cluster for next iteration */
4729                    pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
4730
4731                    /* Count clusters we've seen */
4732                    required += offset % cluster_size + pnum;
4733                }
4734            }
4735        }
4736    }
4737
4738    /* Take into account preallocation.  Nothing special is needed for
4739     * PREALLOC_MODE_METADATA since metadata is always counted.
4740     */
4741    if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
4742        required = virtual_size;
4743    }
4744
4745    info = g_new(BlockMeasureInfo, 1);
4746    info->fully_allocated =
4747        qcow2_calc_prealloc_size(virtual_size, cluster_size,
4748                                 ctz32(refcount_bits)) + luks_payload_size;
4749
4750    /* Remove data clusters that are not required.  This overestimates the
4751     * required size because metadata needed for the fully allocated file is
4752     * still counted.
4753     */
4754    info->required = info->fully_allocated - virtual_size + required;
4755    return info;
4756
4757err:
4758    error_propagate(errp, local_err);
4759    return NULL;
4760}
4761
4762static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4763{
4764    BDRVQcow2State *s = bs->opaque;
4765    bdi->unallocated_blocks_are_zero = true;
4766    bdi->cluster_size = s->cluster_size;
4767    bdi->vm_state_offset = qcow2_vm_state_offset(s);
4768    return 0;
4769}
4770
4771static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
4772                                                  Error **errp)
4773{
4774    BDRVQcow2State *s = bs->opaque;
4775    ImageInfoSpecific *spec_info;
4776    QCryptoBlockInfo *encrypt_info = NULL;
4777    Error *local_err = NULL;
4778
4779    if (s->crypto != NULL) {
4780        encrypt_info = qcrypto_block_get_info(s->crypto, &local_err);
4781        if (local_err) {
4782            error_propagate(errp, local_err);
4783            return NULL;
4784        }
4785    }
4786
4787    spec_info = g_new(ImageInfoSpecific, 1);
4788    *spec_info = (ImageInfoSpecific){
4789        .type  = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
4790        .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1),
4791    };
4792    if (s->qcow_version == 2) {
4793        *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4794            .compat             = g_strdup("0.10"),
4795            .refcount_bits      = s->refcount_bits,
4796        };
4797    } else if (s->qcow_version == 3) {
4798        Qcow2BitmapInfoList *bitmaps;
4799        bitmaps = qcow2_get_bitmap_info_list(bs, &local_err);
4800        if (local_err) {
4801            error_propagate(errp, local_err);
4802            qapi_free_ImageInfoSpecific(spec_info);
4803            return NULL;
4804        }
4805        *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4806            .compat             = g_strdup("1.1"),
4807            .lazy_refcounts     = s->compatible_features &
4808                                  QCOW2_COMPAT_LAZY_REFCOUNTS,
4809            .has_lazy_refcounts = true,
4810            .corrupt            = s->incompatible_features &
4811                                  QCOW2_INCOMPAT_CORRUPT,
4812            .has_corrupt        = true,
4813            .refcount_bits      = s->refcount_bits,
4814            .has_bitmaps        = !!bitmaps,
4815            .bitmaps            = bitmaps,
4816            .has_data_file      = !!s->image_data_file,
4817            .data_file          = g_strdup(s->image_data_file),
4818            .has_data_file_raw  = has_data_file(bs),
4819            .data_file_raw      = data_file_is_raw(bs),
4820        };
4821    } else {
4822        /* if this assertion fails, this probably means a new version was
4823         * added without having it covered here */
4824        assert(false);
4825    }
4826
4827    if (encrypt_info) {
4828        ImageInfoSpecificQCow2Encryption *qencrypt =
4829            g_new(ImageInfoSpecificQCow2Encryption, 1);
4830        switch (encrypt_info->format) {
4831        case Q_CRYPTO_BLOCK_FORMAT_QCOW:
4832            qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
4833            break;
4834        case Q_CRYPTO_BLOCK_FORMAT_LUKS:
4835            qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
4836            qencrypt->u.luks = encrypt_info->u.luks;
4837            break;
4838        default:
4839            abort();
4840        }
4841        /* Since we did shallow copy above, erase any pointers
4842         * in the original info */
4843        memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
4844        qapi_free_QCryptoBlockInfo(encrypt_info);
4845
4846        spec_info->u.qcow2.data->has_encrypt = true;
4847        spec_info->u.qcow2.data->encrypt = qencrypt;
4848    }
4849
4850    return spec_info;
4851}
4852
4853static int qcow2_has_zero_init(BlockDriverState *bs)
4854{
4855    BDRVQcow2State *s = bs->opaque;
4856    bool preallocated;
4857
4858    if (qemu_in_coroutine()) {
4859        qemu_co_mutex_lock(&s->lock);
4860    }
4861    /*
4862     * Check preallocation status: Preallocated images have all L2
4863     * tables allocated, nonpreallocated images have none.  It is
4864     * therefore enough to check the first one.
4865     */
4866    preallocated = s->l1_size > 0 && s->l1_table[0] != 0;
4867    if (qemu_in_coroutine()) {
4868        qemu_co_mutex_unlock(&s->lock);
4869    }
4870
4871    if (!preallocated) {
4872        return 1;
4873    } else if (bs->encrypted) {
4874        return 0;
4875    } else {
4876        return bdrv_has_zero_init(s->data_file->bs);
4877    }
4878}
4879
4880static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4881                              int64_t pos)
4882{
4883    BDRVQcow2State *s = bs->opaque;
4884
4885    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
4886    return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
4887                                         qiov->size, qiov, 0, 0);
4888}
4889
4890static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4891                              int64_t pos)
4892{
4893    BDRVQcow2State *s = bs->opaque;
4894
4895    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
4896    return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos,
4897                                        qiov->size, qiov, 0, 0);
4898}
4899
4900/*
4901 * Downgrades an image's version. To achieve this, any incompatible features
4902 * have to be removed.
4903 */
4904static int qcow2_downgrade(BlockDriverState *bs, int target_version,
4905                           BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
4906                           Error **errp)
4907{
4908    BDRVQcow2State *s = bs->opaque;
4909    int current_version = s->qcow_version;
4910    int ret;
4911
4912    /* This is qcow2_downgrade(), not qcow2_upgrade() */
4913    assert(target_version < current_version);
4914
4915    /* There are no other versions (now) that you can downgrade to */
4916    assert(target_version == 2);
4917
4918    if (s->refcount_order != 4) {
4919        error_setg(errp, "compat=0.10 requires refcount_bits=16");
4920        return -ENOTSUP;
4921    }
4922
4923    if (has_data_file(bs)) {
4924        error_setg(errp, "Cannot downgrade an image with a data file");
4925        return -ENOTSUP;
4926    }
4927
4928    /* clear incompatible features */
4929    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
4930        ret = qcow2_mark_clean(bs);
4931        if (ret < 0) {
4932            error_setg_errno(errp, -ret, "Failed to make the image clean");
4933            return ret;
4934        }
4935    }
4936
4937    /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in
4938     * the first place; if that happens nonetheless, returning -ENOTSUP is the
4939     * best thing to do anyway */
4940
4941    if (s->incompatible_features) {
4942        error_setg(errp, "Cannot downgrade an image with incompatible features "
4943                   "%#" PRIx64 " set", s->incompatible_features);
4944        return -ENOTSUP;
4945    }
4946
4947    /* since we can ignore compatible features, we can set them to 0 as well */
4948    s->compatible_features = 0;
4949    /* if lazy refcounts have been used, they have already been fixed through
4950     * clearing the dirty flag */
4951
4952    /* clearing autoclear features is trivial */
4953    s->autoclear_features = 0;
4954
4955    ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
4956    if (ret < 0) {
4957        error_setg_errno(errp, -ret, "Failed to turn zero into data clusters");
4958        return ret;
4959    }
4960
4961    s->qcow_version = target_version;
4962    ret = qcow2_update_header(bs);
4963    if (ret < 0) {
4964        s->qcow_version = current_version;
4965        error_setg_errno(errp, -ret, "Failed to update the image header");
4966        return ret;
4967    }
4968    return 0;
4969}
4970
4971/*
4972 * Upgrades an image's version.  While newer versions encompass all
4973 * features of older versions, some things may have to be presented
4974 * differently.
4975 */
4976static int qcow2_upgrade(BlockDriverState *bs, int target_version,
4977                         BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
4978                         Error **errp)
4979{
4980    BDRVQcow2State *s = bs->opaque;
4981    bool need_snapshot_update;
4982    int current_version = s->qcow_version;
4983    int i;
4984    int ret;
4985
4986    /* This is qcow2_upgrade(), not qcow2_downgrade() */
4987    assert(target_version > current_version);
4988
4989    /* There are no other versions (yet) that you can upgrade to */
4990    assert(target_version == 3);
4991
4992    status_cb(bs, 0, 2, cb_opaque);
4993
4994    /*
4995     * In v2, snapshots do not need to have extra data.  v3 requires
4996     * the 64-bit VM state size and the virtual disk size to be
4997     * present.
4998     * qcow2_write_snapshots() will always write the list in the
4999     * v3-compliant format.
5000     */
5001    need_snapshot_update = false;
5002    for (i = 0; i < s->nb_snapshots; i++) {
5003        if (s->snapshots[i].extra_data_size <
5004            sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
5005            sizeof_field(QCowSnapshotExtraData, disk_size))
5006        {
5007            need_snapshot_update = true;
5008            break;
5009        }
5010    }
5011    if (need_snapshot_update) {
5012        ret = qcow2_write_snapshots(bs);
5013        if (ret < 0) {
5014            error_setg_errno(errp, -ret, "Failed to update the snapshot table");
5015            return ret;
5016        }
5017    }
5018    status_cb(bs, 1, 2, cb_opaque);
5019
5020    s->qcow_version = target_version;
5021    ret = qcow2_update_header(bs);
5022    if (ret < 0) {
5023        s->qcow_version = current_version;
5024        error_setg_errno(errp, -ret, "Failed to update the image header");
5025        return ret;
5026    }
5027    status_cb(bs, 2, 2, cb_opaque);
5028
5029    return 0;
5030}
5031
5032typedef enum Qcow2AmendOperation {
5033    /* This is the value Qcow2AmendHelperCBInfo::last_operation will be
5034     * statically initialized to so that the helper CB can discern the first
5035     * invocation from an operation change */
5036    QCOW2_NO_OPERATION = 0,
5037
5038    QCOW2_UPGRADING,
5039    QCOW2_CHANGING_REFCOUNT_ORDER,
5040    QCOW2_DOWNGRADING,
5041} Qcow2AmendOperation;
5042
5043typedef struct Qcow2AmendHelperCBInfo {
5044    /* The code coordinating the amend operations should only modify
5045     * these four fields; the rest will be managed by the CB */
5046    BlockDriverAmendStatusCB *original_status_cb;
5047    void *original_cb_opaque;
5048
5049    Qcow2AmendOperation current_operation;
5050
5051    /* Total number of operations to perform (only set once) */
5052    int total_operations;
5053
5054    /* The following fields are managed by the CB */
5055
5056    /* Number of operations completed */
5057    int operations_completed;
5058
5059    /* Cumulative offset of all completed operations */
5060    int64_t offset_completed;
5061
5062    Qcow2AmendOperation last_operation;
5063    int64_t last_work_size;
5064} Qcow2AmendHelperCBInfo;
5065
5066static void qcow2_amend_helper_cb(BlockDriverState *bs,
5067                                  int64_t operation_offset,
5068                                  int64_t operation_work_size, void *opaque)
5069{
5070    Qcow2AmendHelperCBInfo *info = opaque;
5071    int64_t current_work_size;
5072    int64_t projected_work_size;
5073
5074    if (info->current_operation != info->last_operation) {
5075        if (info->last_operation != QCOW2_NO_OPERATION) {
5076            info->offset_completed += info->last_work_size;
5077            info->operations_completed++;
5078        }
5079
5080        info->last_operation = info->current_operation;
5081    }
5082
5083    assert(info->total_operations > 0);
5084    assert(info->operations_completed < info->total_operations);
5085
5086    info->last_work_size = operation_work_size;
5087
5088    current_work_size = info->offset_completed + operation_work_size;
5089
5090    /* current_work_size is the total work size for (operations_completed + 1)
5091     * operations (which includes this one), so multiply it by the number of
5092     * operations not covered and divide it by the number of operations
5093     * covered to get a projection for the operations not covered */
5094    projected_work_size = current_work_size * (info->total_operations -
5095                                               info->operations_completed - 1)
5096                                            / (info->operations_completed + 1);
5097
5098    info->original_status_cb(bs, info->offset_completed + operation_offset,
5099                             current_work_size + projected_work_size,
5100                             info->original_cb_opaque);
5101}
5102
5103static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
5104                               BlockDriverAmendStatusCB *status_cb,
5105                               void *cb_opaque,
5106                               Error **errp)
5107{
5108    BDRVQcow2State *s = bs->opaque;
5109    int old_version = s->qcow_version, new_version = old_version;
5110    uint64_t new_size = 0;
5111    const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL;
5112    bool lazy_refcounts = s->use_lazy_refcounts;
5113    bool data_file_raw = data_file_is_raw(bs);
5114    const char *compat = NULL;
5115    uint64_t cluster_size = s->cluster_size;
5116    bool encrypt;
5117    int encformat;
5118    int refcount_bits = s->refcount_bits;
5119    int ret;
5120    QemuOptDesc *desc = opts->list->desc;
5121    Qcow2AmendHelperCBInfo helper_cb_info;
5122
5123    while (desc && desc->name) {
5124        if (!qemu_opt_find(opts, desc->name)) {
5125            /* only change explicitly defined options */
5126            desc++;
5127            continue;
5128        }
5129
5130        if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
5131            compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
5132            if (!compat) {
5133                /* preserve default */
5134            } else if (!strcmp(compat, "0.10") || !strcmp(compat, "v2")) {
5135                new_version = 2;
5136            } else if (!strcmp(compat, "1.1") || !strcmp(compat, "v3")) {
5137                new_version = 3;
5138            } else {
5139                error_setg(errp, "Unknown compatibility level %s", compat);
5140                return -EINVAL;
5141            }
5142        } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
5143            error_setg(errp, "Cannot change preallocation mode");
5144            return -ENOTSUP;
5145        } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
5146            new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5147        } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
5148            backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5149        } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
5150            backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5151        } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
5152            encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
5153                                        !!s->crypto);
5154
5155            if (encrypt != !!s->crypto) {
5156                error_setg(errp,
5157                           "Changing the encryption flag is not supported");
5158                return -ENOTSUP;
5159            }
5160        } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
5161            encformat = qcow2_crypt_method_from_format(
5162                qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));
5163
5164            if (encformat != s->crypt_method_header) {
5165                error_setg(errp,
5166                           "Changing the encryption format is not supported");
5167                return -ENOTSUP;
5168            }
5169        } else if (g_str_has_prefix(desc->name, "encrypt.")) {
5170            error_setg(errp,
5171                       "Changing the encryption parameters is not supported");
5172            return -ENOTSUP;
5173        } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
5174            cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
5175                                             cluster_size);
5176            if (cluster_size != s->cluster_size) {
5177                error_setg(errp, "Changing the cluster size is not supported");
5178                return -ENOTSUP;
5179            }
5180        } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
5181            lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
5182                                               lazy_refcounts);
5183        } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
5184            refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
5185                                                refcount_bits);
5186
5187            if (refcount_bits <= 0 || refcount_bits > 64 ||
5188                !is_power_of_2(refcount_bits))
5189            {
5190                error_setg(errp, "Refcount width must be a power of two and "
5191                           "may not exceed 64 bits");
5192                return -EINVAL;
5193            }
5194        } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) {
5195            data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE);
5196            if (data_file && !has_data_file(bs)) {
5197                error_setg(errp, "data-file can only be set for images that "
5198                                 "use an external data file");
5199                return -EINVAL;
5200            }
5201        } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) {
5202            data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW,
5203                                              data_file_raw);
5204            if (data_file_raw && !data_file_is_raw(bs)) {
5205                error_setg(errp, "data-file-raw cannot be set on existing "
5206                                 "images");
5207                return -EINVAL;
5208            }
5209        } else {
5210            /* if this point is reached, this probably means a new option was
5211             * added without having it covered here */
5212            abort();
5213        }
5214
5215        desc++;
5216    }
5217
5218    helper_cb_info = (Qcow2AmendHelperCBInfo){
5219        .original_status_cb = status_cb,
5220        .original_cb_opaque = cb_opaque,
5221        .total_operations = (new_version != old_version)
5222                          + (s->refcount_bits != refcount_bits)
5223    };
5224
5225    /* Upgrade first (some features may require compat=1.1) */
5226    if (new_version > old_version) {
5227        helper_cb_info.current_operation = QCOW2_UPGRADING;
5228        ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb,
5229                            &helper_cb_info, errp);
5230        if (ret < 0) {
5231            return ret;
5232        }
5233    }
5234
5235    if (s->refcount_bits != refcount_bits) {
5236        int refcount_order = ctz32(refcount_bits);
5237
5238        if (new_version < 3 && refcount_bits != 16) {
5239            error_setg(errp, "Refcount widths other than 16 bits require "
5240                       "compatibility level 1.1 or above (use compat=1.1 or "
5241                       "greater)");
5242            return -EINVAL;
5243        }
5244
5245        helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
5246        ret = qcow2_change_refcount_order(bs, refcount_order,
5247                                          &qcow2_amend_helper_cb,
5248                                          &helper_cb_info, errp);
5249        if (ret < 0) {
5250            return ret;
5251        }
5252    }
5253
5254    /* data-file-raw blocks backing files, so clear it first if requested */
5255    if (data_file_raw) {
5256        s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5257    } else {
5258        s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5259    }
5260
5261    if (data_file) {
5262        g_free(s->image_data_file);
5263        s->image_data_file = *data_file ? g_strdup(data_file) : NULL;
5264    }
5265
5266    ret = qcow2_update_header(bs);
5267    if (ret < 0) {
5268        error_setg_errno(errp, -ret, "Failed to update the image header");
5269        return ret;
5270    }
5271
5272    if (backing_file || backing_format) {
5273        ret = qcow2_change_backing_file(bs,
5274                    backing_file ?: s->image_backing_file,
5275                    backing_format ?: s->image_backing_format);
5276        if (ret < 0) {
5277            error_setg_errno(errp, -ret, "Failed to change the backing file");
5278            return ret;
5279        }
5280    }
5281
5282    if (s->use_lazy_refcounts != lazy_refcounts) {
5283        if (lazy_refcounts) {
5284            if (new_version < 3) {
5285                error_setg(errp, "Lazy refcounts only supported with "
5286                           "compatibility level 1.1 and above (use compat=1.1 "
5287                           "or greater)");
5288                return -EINVAL;
5289            }
5290            s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5291            ret = qcow2_update_header(bs);
5292            if (ret < 0) {
5293                s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5294                error_setg_errno(errp, -ret, "Failed to update the image header");
5295                return ret;
5296            }
5297            s->use_lazy_refcounts = true;
5298        } else {
5299            /* make image clean first */
5300            ret = qcow2_mark_clean(bs);
5301            if (ret < 0) {
5302                error_setg_errno(errp, -ret, "Failed to make the image clean");
5303                return ret;
5304            }
5305            /* now disallow lazy refcounts */
5306            s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5307            ret = qcow2_update_header(bs);
5308            if (ret < 0) {
5309                s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5310                error_setg_errno(errp, -ret, "Failed to update the image header");
5311                return ret;
5312            }
5313            s->use_lazy_refcounts = false;
5314        }
5315    }
5316
5317    if (new_size) {
5318        BlockBackend *blk = blk_new(bdrv_get_aio_context(bs),
5319                                    BLK_PERM_RESIZE, BLK_PERM_ALL);
5320        ret = blk_insert_bs(blk, bs, errp);
5321        if (ret < 0) {
5322            blk_unref(blk);
5323            return ret;
5324        }
5325
5326        /*
5327         * Amending image options should ensure that the image has
5328         * exactly the given new values, so pass exact=true here.
5329         */
5330        ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp);
5331        blk_unref(blk);
5332        if (ret < 0) {
5333            return ret;
5334        }
5335    }
5336
5337    /* Downgrade last (so unsupported features can be removed before) */
5338    if (new_version < old_version) {
5339        helper_cb_info.current_operation = QCOW2_DOWNGRADING;
5340        ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
5341                              &helper_cb_info, errp);
5342        if (ret < 0) {
5343            return ret;
5344        }
5345    }
5346
5347    return 0;
5348}
5349
5350/*
5351 * If offset or size are negative, respectively, they will not be included in
5352 * the BLOCK_IMAGE_CORRUPTED event emitted.
5353 * fatal will be ignored for read-only BDS; corruptions found there will always
5354 * be considered non-fatal.
5355 */
5356void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
5357                             int64_t size, const char *message_format, ...)
5358{
5359    BDRVQcow2State *s = bs->opaque;
5360    const char *node_name;
5361    char *message;
5362    va_list ap;
5363
5364    fatal = fatal && bdrv_is_writable(bs);
5365
5366    if (s->signaled_corruption &&
5367        (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
5368    {
5369        return;
5370    }
5371
5372    va_start(ap, message_format);
5373    message = g_strdup_vprintf(message_format, ap);
5374    va_end(ap);
5375
5376    if (fatal) {
5377        fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
5378                "corruption events will be suppressed\n", message);
5379    } else {
5380        fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
5381                "corruption events will be suppressed\n", message);
5382    }
5383
5384    node_name = bdrv_get_node_name(bs);
5385    qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
5386                                          *node_name != '\0', node_name,
5387                                          message, offset >= 0, offset,
5388                                          size >= 0, size,
5389                                          fatal);
5390    g_free(message);
5391
5392    if (fatal) {
5393        qcow2_mark_corrupt(bs);
5394        bs->drv = NULL; /* make BDS unusable */
5395    }
5396
5397    s->signaled_corruption = true;
5398}
5399
5400static QemuOptsList qcow2_create_opts = {
5401    .name = "qcow2-create-opts",
5402    .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
5403    .desc = {
5404        {
5405            .name = BLOCK_OPT_SIZE,
5406            .type = QEMU_OPT_SIZE,
5407            .help = "Virtual disk size"
5408        },
5409        {
5410            .name = BLOCK_OPT_COMPAT_LEVEL,
5411            .type = QEMU_OPT_STRING,
5412            .help = "Compatibility level (v2 [0.10] or v3 [1.1])"
5413        },
5414        {
5415            .name = BLOCK_OPT_BACKING_FILE,
5416            .type = QEMU_OPT_STRING,
5417            .help = "File name of a base image"
5418        },
5419        {
5420            .name = BLOCK_OPT_BACKING_FMT,
5421            .type = QEMU_OPT_STRING,
5422            .help = "Image format of the base image"
5423        },
5424        {
5425            .name = BLOCK_OPT_DATA_FILE,
5426            .type = QEMU_OPT_STRING,
5427            .help = "File name of an external data file"
5428        },
5429        {
5430            .name = BLOCK_OPT_DATA_FILE_RAW,
5431            .type = QEMU_OPT_BOOL,
5432            .help = "The external data file must stay valid as a raw image"
5433        },
5434        {
5435            .name = BLOCK_OPT_ENCRYPT,
5436            .type = QEMU_OPT_BOOL,
5437            .help = "Encrypt the image with format 'aes'. (Deprecated "
5438                    "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
5439        },
5440        {
5441            .name = BLOCK_OPT_ENCRYPT_FORMAT,
5442            .type = QEMU_OPT_STRING,
5443            .help = "Encrypt the image, format choices: 'aes', 'luks'",
5444        },
5445        BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
5446            "ID of secret providing qcow AES key or LUKS passphrase"),
5447        BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."),
5448        BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."),
5449        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."),
5450        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."),
5451        BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."),
5452        BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
5453        {
5454            .name = BLOCK_OPT_CLUSTER_SIZE,
5455            .type = QEMU_OPT_SIZE,
5456            .help = "qcow2 cluster size",
5457            .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
5458        },
5459        {
5460            .name = BLOCK_OPT_PREALLOC,
5461            .type = QEMU_OPT_STRING,
5462            .help = "Preallocation mode (allowed values: off, metadata, "
5463                    "falloc, full)"
5464        },
5465        {
5466            .name = BLOCK_OPT_LAZY_REFCOUNTS,
5467            .type = QEMU_OPT_BOOL,
5468            .help = "Postpone refcount updates",
5469            .def_value_str = "off"
5470        },
5471        {
5472            .name = BLOCK_OPT_REFCOUNT_BITS,
5473            .type = QEMU_OPT_NUMBER,
5474            .help = "Width of a reference count entry in bits",
5475            .def_value_str = "16"
5476        },
5477        { /* end of list */ }
5478    }
5479};
5480
5481static const char *const qcow2_strong_runtime_opts[] = {
5482    "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,
5483
5484    NULL
5485};
5486
5487BlockDriver bdrv_qcow2 = {
5488    .format_name        = "qcow2",
5489    .instance_size      = sizeof(BDRVQcow2State),
5490    .bdrv_probe         = qcow2_probe,
5491    .bdrv_open          = qcow2_open,
5492    .bdrv_close         = qcow2_close,
5493    .bdrv_reopen_prepare  = qcow2_reopen_prepare,
5494    .bdrv_reopen_commit   = qcow2_reopen_commit,
5495    .bdrv_reopen_abort    = qcow2_reopen_abort,
5496    .bdrv_join_options    = qcow2_join_options,
5497    .bdrv_child_perm      = bdrv_format_default_perms,
5498    .bdrv_co_create_opts  = qcow2_co_create_opts,
5499    .bdrv_co_create       = qcow2_co_create,
5500    .bdrv_has_zero_init   = qcow2_has_zero_init,
5501    .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
5502    .bdrv_co_block_status = qcow2_co_block_status,
5503
5504    .bdrv_co_preadv_part    = qcow2_co_preadv_part,
5505    .bdrv_co_pwritev_part   = qcow2_co_pwritev_part,
5506    .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
5507
5508    .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
5509    .bdrv_co_pdiscard       = qcow2_co_pdiscard,
5510    .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
5511    .bdrv_co_copy_range_to  = qcow2_co_copy_range_to,
5512    .bdrv_co_truncate       = qcow2_co_truncate,
5513    .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
5514    .bdrv_make_empty        = qcow2_make_empty,
5515
5516    .bdrv_snapshot_create   = qcow2_snapshot_create,
5517    .bdrv_snapshot_goto     = qcow2_snapshot_goto,
5518    .bdrv_snapshot_delete   = qcow2_snapshot_delete,
5519    .bdrv_snapshot_list     = qcow2_snapshot_list,
5520    .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
5521    .bdrv_measure           = qcow2_measure,
5522    .bdrv_get_info          = qcow2_get_info,
5523    .bdrv_get_specific_info = qcow2_get_specific_info,
5524
5525    .bdrv_save_vmstate    = qcow2_save_vmstate,
5526    .bdrv_load_vmstate    = qcow2_load_vmstate,
5527
5528    .supports_backing           = true,
5529    .bdrv_change_backing_file   = qcow2_change_backing_file,
5530
5531    .bdrv_refresh_limits        = qcow2_refresh_limits,
5532    .bdrv_co_invalidate_cache   = qcow2_co_invalidate_cache,
5533    .bdrv_inactivate            = qcow2_inactivate,
5534
5535    .create_opts         = &qcow2_create_opts,
5536    .strong_runtime_opts = qcow2_strong_runtime_opts,
5537    .mutable_opts        = mutable_opts,
5538    .bdrv_co_check       = qcow2_co_check,
5539    .bdrv_amend_options  = qcow2_amend_options,
5540
5541    .bdrv_detach_aio_context  = qcow2_detach_aio_context,
5542    .bdrv_attach_aio_context  = qcow2_attach_aio_context,
5543
5544    .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap,
5545    .bdrv_co_remove_persistent_dirty_bitmap =
5546            qcow2_co_remove_persistent_dirty_bitmap,
5547};
5548
5549static void bdrv_qcow2_init(void)
5550{
5551    bdrv_register(&bdrv_qcow2);
5552}
5553
5554block_init(bdrv_qcow2_init);
5555