qemu/block/qcow2.c
<<
>>
Prefs
   1/*
   2 * Block driver for the QCOW version 2 format
   3 *
   4 * Copyright (c) 2004-2006 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26
  27#include "block/qdict.h"
  28#include "sysemu/block-backend.h"
  29#include "qemu/main-loop.h"
  30#include "qemu/module.h"
  31#include "qcow2.h"
  32#include "qemu/error-report.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-events-block-core.h"
  35#include "qapi/qmp/qdict.h"
  36#include "qapi/qmp/qstring.h"
  37#include "trace.h"
  38#include "qemu/option_int.h"
  39#include "qemu/cutils.h"
  40#include "qemu/bswap.h"
  41#include "qapi/qobject-input-visitor.h"
  42#include "qapi/qapi-visit-block-core.h"
  43#include "crypto.h"
  44#include "block/aio_task.h"
  45
  46/*
  47  Differences with QCOW:
  48
  49  - Support for multiple incremental snapshots.
  50  - Memory management by reference counts.
  51  - Clusters which have a reference count of one have the bit
  52    QCOW_OFLAG_COPIED to optimize write performance.
  53  - Size of compressed clusters is stored in sectors to reduce bit usage
  54    in the cluster offsets.
  55  - Support for storing additional data (such as the VM state) in the
  56    snapshots.
  57  - If a backing store is used, the cluster size is not constrained
  58    (could be backported to QCOW).
  59  - L2 tables have always a size of one cluster.
  60*/
  61
  62
  63typedef struct {
  64    uint32_t magic;
  65    uint32_t len;
  66} QEMU_PACKED QCowExtension;
  67
  68#define  QCOW2_EXT_MAGIC_END 0
  69#define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
  70#define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
  71#define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
  72#define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
  73#define  QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
  74
  75static int coroutine_fn
  76qcow2_co_preadv_compressed(BlockDriverState *bs,
  77                           uint64_t file_cluster_offset,
  78                           uint64_t offset,
  79                           uint64_t bytes,
  80                           QEMUIOVector *qiov,
  81                           size_t qiov_offset);
  82
  83static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
  84{
  85    const QCowHeader *cow_header = (const void *)buf;
  86
  87    if (buf_size >= sizeof(QCowHeader) &&
  88        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
  89        be32_to_cpu(cow_header->version) >= 2)
  90        return 100;
  91    else
  92        return 0;
  93}
  94
  95
  96static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
  97                                          uint8_t *buf, size_t buflen,
  98                                          void *opaque, Error **errp)
  99{
 100    BlockDriverState *bs = opaque;
 101    BDRVQcow2State *s = bs->opaque;
 102    ssize_t ret;
 103
 104    if ((offset + buflen) > s->crypto_header.length) {
 105        error_setg(errp, "Request for data outside of extension header");
 106        return -1;
 107    }
 108
 109    ret = bdrv_pread(bs->file,
 110                     s->crypto_header.offset + offset, buf, buflen);
 111    if (ret < 0) {
 112        error_setg_errno(errp, -ret, "Could not read encryption header");
 113        return -1;
 114    }
 115    return ret;
 116}
 117
 118
 119static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
 120                                          void *opaque, Error **errp)
 121{
 122    BlockDriverState *bs = opaque;
 123    BDRVQcow2State *s = bs->opaque;
 124    int64_t ret;
 125    int64_t clusterlen;
 126
 127    ret = qcow2_alloc_clusters(bs, headerlen);
 128    if (ret < 0) {
 129        error_setg_errno(errp, -ret,
 130                         "Cannot allocate cluster for LUKS header size %zu",
 131                         headerlen);
 132        return -1;
 133    }
 134
 135    s->crypto_header.length = headerlen;
 136    s->crypto_header.offset = ret;
 137
 138    /*
 139     * Zero fill all space in cluster so it has predictable
 140     * content, as we may not initialize some regions of the
 141     * header (eg only 1 out of 8 key slots will be initialized)
 142     */
 143    clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
 144    assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
 145    ret = bdrv_pwrite_zeroes(bs->file,
 146                             ret,
 147                             clusterlen, 0);
 148    if (ret < 0) {
 149        error_setg_errno(errp, -ret, "Could not zero fill encryption header");
 150        return -1;
 151    }
 152
 153    return ret;
 154}
 155
 156
 157static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
 158                                           const uint8_t *buf, size_t buflen,
 159                                           void *opaque, Error **errp)
 160{
 161    BlockDriverState *bs = opaque;
 162    BDRVQcow2State *s = bs->opaque;
 163    ssize_t ret;
 164
 165    if ((offset + buflen) > s->crypto_header.length) {
 166        error_setg(errp, "Request for data outside of extension header");
 167        return -1;
 168    }
 169
 170    ret = bdrv_pwrite(bs->file,
 171                      s->crypto_header.offset + offset, buf, buflen);
 172    if (ret < 0) {
 173        error_setg_errno(errp, -ret, "Could not read encryption header");
 174        return -1;
 175    }
 176    return ret;
 177}
 178
 179
 180/*
 181 * read qcow2 extension and fill bs
 182 * start reading from start_offset
 183 * finish reading upon magic of value 0 or when end_offset reached
 184 * unknown magic is skipped (future extension this version knows nothing about)
 185 * return 0 upon success, non-0 otherwise
 186 */
 187static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
 188                                 uint64_t end_offset, void **p_feature_table,
 189                                 int flags, bool *need_update_header,
 190                                 Error **errp)
 191{
 192    BDRVQcow2State *s = bs->opaque;
 193    QCowExtension ext;
 194    uint64_t offset;
 195    int ret;
 196    Qcow2BitmapHeaderExt bitmaps_ext;
 197
 198    if (need_update_header != NULL) {
 199        *need_update_header = false;
 200    }
 201
 202#ifdef DEBUG_EXT
 203    printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
 204#endif
 205    offset = start_offset;
 206    while (offset < end_offset) {
 207
 208#ifdef DEBUG_EXT
 209        /* Sanity check */
 210        if (offset > s->cluster_size)
 211            printf("qcow2_read_extension: suspicious offset %lu\n", offset);
 212
 213        printf("attempting to read extended header in offset %lu\n", offset);
 214#endif
 215
 216        ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
 217        if (ret < 0) {
 218            error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
 219                             "pread fail from offset %" PRIu64, offset);
 220            return 1;
 221        }
 222        ext.magic = be32_to_cpu(ext.magic);
 223        ext.len = be32_to_cpu(ext.len);
 224        offset += sizeof(ext);
 225#ifdef DEBUG_EXT
 226        printf("ext.magic = 0x%x\n", ext.magic);
 227#endif
 228        if (offset > end_offset || ext.len > end_offset - offset) {
 229            error_setg(errp, "Header extension too large");
 230            return -EINVAL;
 231        }
 232
 233        switch (ext.magic) {
 234        case QCOW2_EXT_MAGIC_END:
 235            return 0;
 236
 237        case QCOW2_EXT_MAGIC_BACKING_FORMAT:
 238            if (ext.len >= sizeof(bs->backing_format)) {
 239                error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
 240                           " too large (>=%zu)", ext.len,
 241                           sizeof(bs->backing_format));
 242                return 2;
 243            }
 244            ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
 245            if (ret < 0) {
 246                error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
 247                                 "Could not read format name");
 248                return 3;
 249            }
 250            bs->backing_format[ext.len] = '\0';
 251            s->image_backing_format = g_strdup(bs->backing_format);
 252#ifdef DEBUG_EXT
 253            printf("Qcow2: Got format extension %s\n", bs->backing_format);
 254#endif
 255            break;
 256
 257        case QCOW2_EXT_MAGIC_FEATURE_TABLE:
 258            if (p_feature_table != NULL) {
 259                void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
 260                ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
 261                if (ret < 0) {
 262                    error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
 263                                     "Could not read table");
 264                    return ret;
 265                }
 266
 267                *p_feature_table = feature_table;
 268            }
 269            break;
 270
 271        case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
 272            unsigned int cflags = 0;
 273            if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
 274                error_setg(errp, "CRYPTO header extension only "
 275                           "expected with LUKS encryption method");
 276                return -EINVAL;
 277            }
 278            if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
 279                error_setg(errp, "CRYPTO header extension size %u, "
 280                           "but expected size %zu", ext.len,
 281                           sizeof(Qcow2CryptoHeaderExtension));
 282                return -EINVAL;
 283            }
 284
 285            ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len);
 286            if (ret < 0) {
 287                error_setg_errno(errp, -ret,
 288                                 "Unable to read CRYPTO header extension");
 289                return ret;
 290            }
 291            s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
 292            s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
 293
 294            if ((s->crypto_header.offset % s->cluster_size) != 0) {
 295                error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
 296                           "not a multiple of cluster size '%u'",
 297                           s->crypto_header.offset, s->cluster_size);
 298                return -EINVAL;
 299            }
 300
 301            if (flags & BDRV_O_NO_IO) {
 302                cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
 303            }
 304            s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
 305                                           qcow2_crypto_hdr_read_func,
 306                                           bs, cflags, QCOW2_MAX_THREADS, errp);
 307            if (!s->crypto) {
 308                return -EINVAL;
 309            }
 310        }   break;
 311
 312        case QCOW2_EXT_MAGIC_BITMAPS:
 313            if (ext.len != sizeof(bitmaps_ext)) {
 314                error_setg_errno(errp, -ret, "bitmaps_ext: "
 315                                 "Invalid extension length");
 316                return -EINVAL;
 317            }
 318
 319            if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
 320                if (s->qcow_version < 3) {
 321                    /* Let's be a bit more specific */
 322                    warn_report("This qcow2 v2 image contains bitmaps, but "
 323                                "they may have been modified by a program "
 324                                "without persistent bitmap support; so now "
 325                                "they must all be considered inconsistent");
 326                } else {
 327                    warn_report("a program lacking bitmap support "
 328                                "modified this file, so all bitmaps are now "
 329                                "considered inconsistent");
 330                }
 331                error_printf("Some clusters may be leaked, "
 332                             "run 'qemu-img check -r' on the image "
 333                             "file to fix.");
 334                if (need_update_header != NULL) {
 335                    /* Updating is needed to drop invalid bitmap extension. */
 336                    *need_update_header = true;
 337                }
 338                break;
 339            }
 340
 341            ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len);
 342            if (ret < 0) {
 343                error_setg_errno(errp, -ret, "bitmaps_ext: "
 344                                 "Could not read ext header");
 345                return ret;
 346            }
 347
 348            if (bitmaps_ext.reserved32 != 0) {
 349                error_setg_errno(errp, -ret, "bitmaps_ext: "
 350                                 "Reserved field is not zero");
 351                return -EINVAL;
 352            }
 353
 354            bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps);
 355            bitmaps_ext.bitmap_directory_size =
 356                be64_to_cpu(bitmaps_ext.bitmap_directory_size);
 357            bitmaps_ext.bitmap_directory_offset =
 358                be64_to_cpu(bitmaps_ext.bitmap_directory_offset);
 359
 360            if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
 361                error_setg(errp,
 362                           "bitmaps_ext: Image has %" PRIu32 " bitmaps, "
 363                           "exceeding the QEMU supported maximum of %d",
 364                           bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
 365                return -EINVAL;
 366            }
 367
 368            if (bitmaps_ext.nb_bitmaps == 0) {
 369                error_setg(errp, "found bitmaps extension with zero bitmaps");
 370                return -EINVAL;
 371            }
 372
 373            if (offset_into_cluster(s, bitmaps_ext.bitmap_directory_offset)) {
 374                error_setg(errp, "bitmaps_ext: "
 375                                 "invalid bitmap directory offset");
 376                return -EINVAL;
 377            }
 378
 379            if (bitmaps_ext.bitmap_directory_size >
 380                QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
 381                error_setg(errp, "bitmaps_ext: "
 382                                 "bitmap directory size (%" PRIu64 ") exceeds "
 383                                 "the maximum supported size (%d)",
 384                                 bitmaps_ext.bitmap_directory_size,
 385                                 QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
 386                return -EINVAL;
 387            }
 388
 389            s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
 390            s->bitmap_directory_offset =
 391                    bitmaps_ext.bitmap_directory_offset;
 392            s->bitmap_directory_size =
 393                    bitmaps_ext.bitmap_directory_size;
 394
 395#ifdef DEBUG_EXT
 396            printf("Qcow2: Got bitmaps extension: "
 397                   "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
 398                   s->bitmap_directory_offset, s->nb_bitmaps);
 399#endif
 400            break;
 401
 402        case QCOW2_EXT_MAGIC_DATA_FILE:
 403        {
 404            s->image_data_file = g_malloc0(ext.len + 1);
 405            ret = bdrv_pread(bs->file, offset, s->image_data_file, ext.len);
 406            if (ret < 0) {
 407                error_setg_errno(errp, -ret,
 408                                 "ERROR: Could not read data file name");
 409                return ret;
 410            }
 411#ifdef DEBUG_EXT
 412            printf("Qcow2: Got external data file %s\n", s->image_data_file);
 413#endif
 414            break;
 415        }
 416
 417        default:
 418            /* unknown magic - save it in case we need to rewrite the header */
 419            /* If you add a new feature, make sure to also update the fast
 420             * path of qcow2_make_empty() to deal with it. */
 421            {
 422                Qcow2UnknownHeaderExtension *uext;
 423
 424                uext = g_malloc0(sizeof(*uext)  + ext.len);
 425                uext->magic = ext.magic;
 426                uext->len = ext.len;
 427                QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
 428
 429                ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
 430                if (ret < 0) {
 431                    error_setg_errno(errp, -ret, "ERROR: unknown extension: "
 432                                     "Could not read data");
 433                    return ret;
 434                }
 435            }
 436            break;
 437        }
 438
 439        offset += ((ext.len + 7) & ~7);
 440    }
 441
 442    return 0;
 443}
 444
 445static void cleanup_unknown_header_ext(BlockDriverState *bs)
 446{
 447    BDRVQcow2State *s = bs->opaque;
 448    Qcow2UnknownHeaderExtension *uext, *next;
 449
 450    QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
 451        QLIST_REMOVE(uext, next);
 452        g_free(uext);
 453    }
 454}
 455
 456static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
 457                                       uint64_t mask)
 458{
 459    g_autoptr(GString) features = g_string_sized_new(60);
 460
 461    while (table && table->name[0] != '\0') {
 462        if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
 463            if (mask & (1ULL << table->bit)) {
 464                if (features->len > 0) {
 465                    g_string_append(features, ", ");
 466                }
 467                g_string_append_printf(features, "%.46s", table->name);
 468                mask &= ~(1ULL << table->bit);
 469            }
 470        }
 471        table++;
 472    }
 473
 474    if (mask) {
 475        if (features->len > 0) {
 476            g_string_append(features, ", ");
 477        }
 478        g_string_append_printf(features,
 479                               "Unknown incompatible feature: %" PRIx64, mask);
 480    }
 481
 482    error_setg(errp, "Unsupported qcow2 feature(s): %s", features->str);
 483}
 484
 485/*
 486 * Sets the dirty bit and flushes afterwards if necessary.
 487 *
 488 * The incompatible_features bit is only set if the image file header was
 489 * updated successfully.  Therefore it is not required to check the return
 490 * value of this function.
 491 */
 492int qcow2_mark_dirty(BlockDriverState *bs)
 493{
 494    BDRVQcow2State *s = bs->opaque;
 495    uint64_t val;
 496    int ret;
 497
 498    assert(s->qcow_version >= 3);
 499
 500    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
 501        return 0; /* already dirty */
 502    }
 503
 504    val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
 505    ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
 506                      &val, sizeof(val));
 507    if (ret < 0) {
 508        return ret;
 509    }
 510    ret = bdrv_flush(bs->file->bs);
 511    if (ret < 0) {
 512        return ret;
 513    }
 514
 515    /* Only treat image as dirty if the header was updated successfully */
 516    s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
 517    return 0;
 518}
 519
 520/*
 521 * Clears the dirty bit and flushes before if necessary.  Only call this
 522 * function when there are no pending requests, it does not guard against
 523 * concurrent requests dirtying the image.
 524 */
 525static int qcow2_mark_clean(BlockDriverState *bs)
 526{
 527    BDRVQcow2State *s = bs->opaque;
 528
 529    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
 530        int ret;
 531
 532        s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
 533
 534        ret = qcow2_flush_caches(bs);
 535        if (ret < 0) {
 536            return ret;
 537        }
 538
 539        return qcow2_update_header(bs);
 540    }
 541    return 0;
 542}
 543
 544/*
 545 * Marks the image as corrupt.
 546 */
 547int qcow2_mark_corrupt(BlockDriverState *bs)
 548{
 549    BDRVQcow2State *s = bs->opaque;
 550
 551    s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
 552    return qcow2_update_header(bs);
 553}
 554
 555/*
 556 * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
 557 * before if necessary.
 558 */
 559int qcow2_mark_consistent(BlockDriverState *bs)
 560{
 561    BDRVQcow2State *s = bs->opaque;
 562
 563    if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
 564        int ret = qcow2_flush_caches(bs);
 565        if (ret < 0) {
 566            return ret;
 567        }
 568
 569        s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
 570        return qcow2_update_header(bs);
 571    }
 572    return 0;
 573}
 574
 575static void qcow2_add_check_result(BdrvCheckResult *out,
 576                                   const BdrvCheckResult *src,
 577                                   bool set_allocation_info)
 578{
 579    out->corruptions += src->corruptions;
 580    out->leaks += src->leaks;
 581    out->check_errors += src->check_errors;
 582    out->corruptions_fixed += src->corruptions_fixed;
 583    out->leaks_fixed += src->leaks_fixed;
 584
 585    if (set_allocation_info) {
 586        out->image_end_offset = src->image_end_offset;
 587        out->bfi = src->bfi;
 588    }
 589}
 590
 591static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
 592                                              BdrvCheckResult *result,
 593                                              BdrvCheckMode fix)
 594{
 595    BdrvCheckResult snapshot_res = {};
 596    BdrvCheckResult refcount_res = {};
 597    int ret;
 598
 599    memset(result, 0, sizeof(*result));
 600
 601    ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix);
 602    if (ret < 0) {
 603        qcow2_add_check_result(result, &snapshot_res, false);
 604        return ret;
 605    }
 606
 607    ret = qcow2_check_refcounts(bs, &refcount_res, fix);
 608    qcow2_add_check_result(result, &refcount_res, true);
 609    if (ret < 0) {
 610        qcow2_add_check_result(result, &snapshot_res, false);
 611        return ret;
 612    }
 613
 614    ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix);
 615    qcow2_add_check_result(result, &snapshot_res, false);
 616    if (ret < 0) {
 617        return ret;
 618    }
 619
 620    if (fix && result->check_errors == 0 && result->corruptions == 0) {
 621        ret = qcow2_mark_clean(bs);
 622        if (ret < 0) {
 623            return ret;
 624        }
 625        return qcow2_mark_consistent(bs);
 626    }
 627    return ret;
 628}
 629
 630static int coroutine_fn qcow2_co_check(BlockDriverState *bs,
 631                                       BdrvCheckResult *result,
 632                                       BdrvCheckMode fix)
 633{
 634    BDRVQcow2State *s = bs->opaque;
 635    int ret;
 636
 637    qemu_co_mutex_lock(&s->lock);
 638    ret = qcow2_co_check_locked(bs, result, fix);
 639    qemu_co_mutex_unlock(&s->lock);
 640    return ret;
 641}
 642
 643int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
 644                         uint64_t entries, size_t entry_len,
 645                         int64_t max_size_bytes, const char *table_name,
 646                         Error **errp)
 647{
 648    BDRVQcow2State *s = bs->opaque;
 649
 650    if (entries > max_size_bytes / entry_len) {
 651        error_setg(errp, "%s too large", table_name);
 652        return -EFBIG;
 653    }
 654
 655    /* Use signed INT64_MAX as the maximum even for uint64_t header fields,
 656     * because values will be passed to qemu functions taking int64_t. */
 657    if ((INT64_MAX - entries * entry_len < offset) ||
 658        (offset_into_cluster(s, offset) != 0)) {
 659        error_setg(errp, "%s offset invalid", table_name);
 660        return -EINVAL;
 661    }
 662
 663    return 0;
 664}
 665
 666static const char *const mutable_opts[] = {
 667    QCOW2_OPT_LAZY_REFCOUNTS,
 668    QCOW2_OPT_DISCARD_REQUEST,
 669    QCOW2_OPT_DISCARD_SNAPSHOT,
 670    QCOW2_OPT_DISCARD_OTHER,
 671    QCOW2_OPT_OVERLAP,
 672    QCOW2_OPT_OVERLAP_TEMPLATE,
 673    QCOW2_OPT_OVERLAP_MAIN_HEADER,
 674    QCOW2_OPT_OVERLAP_ACTIVE_L1,
 675    QCOW2_OPT_OVERLAP_ACTIVE_L2,
 676    QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
 677    QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
 678    QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
 679    QCOW2_OPT_OVERLAP_INACTIVE_L1,
 680    QCOW2_OPT_OVERLAP_INACTIVE_L2,
 681    QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
 682    QCOW2_OPT_CACHE_SIZE,
 683    QCOW2_OPT_L2_CACHE_SIZE,
 684    QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
 685    QCOW2_OPT_REFCOUNT_CACHE_SIZE,
 686    QCOW2_OPT_CACHE_CLEAN_INTERVAL,
 687    NULL
 688};
 689
 690static QemuOptsList qcow2_runtime_opts = {
 691    .name = "qcow2",
 692    .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
 693    .desc = {
 694        {
 695            .name = QCOW2_OPT_LAZY_REFCOUNTS,
 696            .type = QEMU_OPT_BOOL,
 697            .help = "Postpone refcount updates",
 698        },
 699        {
 700            .name = QCOW2_OPT_DISCARD_REQUEST,
 701            .type = QEMU_OPT_BOOL,
 702            .help = "Pass guest discard requests to the layer below",
 703        },
 704        {
 705            .name = QCOW2_OPT_DISCARD_SNAPSHOT,
 706            .type = QEMU_OPT_BOOL,
 707            .help = "Generate discard requests when snapshot related space "
 708                    "is freed",
 709        },
 710        {
 711            .name = QCOW2_OPT_DISCARD_OTHER,
 712            .type = QEMU_OPT_BOOL,
 713            .help = "Generate discard requests when other clusters are freed",
 714        },
 715        {
 716            .name = QCOW2_OPT_OVERLAP,
 717            .type = QEMU_OPT_STRING,
 718            .help = "Selects which overlap checks to perform from a range of "
 719                    "templates (none, constant, cached, all)",
 720        },
 721        {
 722            .name = QCOW2_OPT_OVERLAP_TEMPLATE,
 723            .type = QEMU_OPT_STRING,
 724            .help = "Selects which overlap checks to perform from a range of "
 725                    "templates (none, constant, cached, all)",
 726        },
 727        {
 728            .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
 729            .type = QEMU_OPT_BOOL,
 730            .help = "Check for unintended writes into the main qcow2 header",
 731        },
 732        {
 733            .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
 734            .type = QEMU_OPT_BOOL,
 735            .help = "Check for unintended writes into the active L1 table",
 736        },
 737        {
 738            .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
 739            .type = QEMU_OPT_BOOL,
 740            .help = "Check for unintended writes into an active L2 table",
 741        },
 742        {
 743            .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
 744            .type = QEMU_OPT_BOOL,
 745            .help = "Check for unintended writes into the refcount table",
 746        },
 747        {
 748            .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
 749            .type = QEMU_OPT_BOOL,
 750            .help = "Check for unintended writes into a refcount block",
 751        },
 752        {
 753            .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
 754            .type = QEMU_OPT_BOOL,
 755            .help = "Check for unintended writes into the snapshot table",
 756        },
 757        {
 758            .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
 759            .type = QEMU_OPT_BOOL,
 760            .help = "Check for unintended writes into an inactive L1 table",
 761        },
 762        {
 763            .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
 764            .type = QEMU_OPT_BOOL,
 765            .help = "Check for unintended writes into an inactive L2 table",
 766        },
 767        {
 768            .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
 769            .type = QEMU_OPT_BOOL,
 770            .help = "Check for unintended writes into the bitmap directory",
 771        },
 772        {
 773            .name = QCOW2_OPT_CACHE_SIZE,
 774            .type = QEMU_OPT_SIZE,
 775            .help = "Maximum combined metadata (L2 tables and refcount blocks) "
 776                    "cache size",
 777        },
 778        {
 779            .name = QCOW2_OPT_L2_CACHE_SIZE,
 780            .type = QEMU_OPT_SIZE,
 781            .help = "Maximum L2 table cache size",
 782        },
 783        {
 784            .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
 785            .type = QEMU_OPT_SIZE,
 786            .help = "Size of each entry in the L2 cache",
 787        },
 788        {
 789            .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
 790            .type = QEMU_OPT_SIZE,
 791            .help = "Maximum refcount block cache size",
 792        },
 793        {
 794            .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
 795            .type = QEMU_OPT_NUMBER,
 796            .help = "Clean unused cache entries after this time (in seconds)",
 797        },
 798        BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
 799            "ID of secret providing qcow2 AES key or LUKS passphrase"),
 800        { /* end of list */ }
 801    },
 802};
 803
 804static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
 805    [QCOW2_OL_MAIN_HEADER_BITNR]      = QCOW2_OPT_OVERLAP_MAIN_HEADER,
 806    [QCOW2_OL_ACTIVE_L1_BITNR]        = QCOW2_OPT_OVERLAP_ACTIVE_L1,
 807    [QCOW2_OL_ACTIVE_L2_BITNR]        = QCOW2_OPT_OVERLAP_ACTIVE_L2,
 808    [QCOW2_OL_REFCOUNT_TABLE_BITNR]   = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
 809    [QCOW2_OL_REFCOUNT_BLOCK_BITNR]   = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
 810    [QCOW2_OL_SNAPSHOT_TABLE_BITNR]   = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
 811    [QCOW2_OL_INACTIVE_L1_BITNR]      = QCOW2_OPT_OVERLAP_INACTIVE_L1,
 812    [QCOW2_OL_INACTIVE_L2_BITNR]      = QCOW2_OPT_OVERLAP_INACTIVE_L2,
 813    [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
 814};
 815
 816static void cache_clean_timer_cb(void *opaque)
 817{
 818    BlockDriverState *bs = opaque;
 819    BDRVQcow2State *s = bs->opaque;
 820    qcow2_cache_clean_unused(s->l2_table_cache);
 821    qcow2_cache_clean_unused(s->refcount_block_cache);
 822    timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
 823              (int64_t) s->cache_clean_interval * 1000);
 824}
 825
 826static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
 827{
 828    BDRVQcow2State *s = bs->opaque;
 829    if (s->cache_clean_interval > 0) {
 830        s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
 831                                             SCALE_MS, cache_clean_timer_cb,
 832                                             bs);
 833        timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
 834                  (int64_t) s->cache_clean_interval * 1000);
 835    }
 836}
 837
 838static void cache_clean_timer_del(BlockDriverState *bs)
 839{
 840    BDRVQcow2State *s = bs->opaque;
 841    if (s->cache_clean_timer) {
 842        timer_del(s->cache_clean_timer);
 843        timer_free(s->cache_clean_timer);
 844        s->cache_clean_timer = NULL;
 845    }
 846}
 847
 848static void qcow2_detach_aio_context(BlockDriverState *bs)
 849{
 850    cache_clean_timer_del(bs);
 851}
 852
 853static void qcow2_attach_aio_context(BlockDriverState *bs,
 854                                     AioContext *new_context)
 855{
 856    cache_clean_timer_init(bs, new_context);
 857}
 858
 859static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
 860                             uint64_t *l2_cache_size,
 861                             uint64_t *l2_cache_entry_size,
 862                             uint64_t *refcount_cache_size, Error **errp)
 863{
 864    BDRVQcow2State *s = bs->opaque;
 865    uint64_t combined_cache_size, l2_cache_max_setting;
 866    bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
 867    bool l2_cache_entry_size_set;
 868    int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
 869    uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
 870    uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
 871    /* An L2 table is always one cluster in size so the max cache size
 872     * should be a multiple of the cluster size. */
 873    uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t),
 874                                     s->cluster_size);
 875
 876    combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
 877    l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
 878    refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
 879    l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE);
 880
 881    combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
 882    l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE,
 883                                             DEFAULT_L2_CACHE_MAX_SIZE);
 884    *refcount_cache_size = qemu_opt_get_size(opts,
 885                                             QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
 886
 887    *l2_cache_entry_size = qemu_opt_get_size(
 888        opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
 889
 890    *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);
 891
 892    if (combined_cache_size_set) {
 893        if (l2_cache_size_set && refcount_cache_size_set) {
 894            error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
 895                       " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
 896                       "at the same time");
 897            return;
 898        } else if (l2_cache_size_set &&
 899                   (l2_cache_max_setting > combined_cache_size)) {
 900            error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
 901                       QCOW2_OPT_CACHE_SIZE);
 902            return;
 903        } else if (*refcount_cache_size > combined_cache_size) {
 904            error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
 905                       QCOW2_OPT_CACHE_SIZE);
 906            return;
 907        }
 908
 909        if (l2_cache_size_set) {
 910            *refcount_cache_size = combined_cache_size - *l2_cache_size;
 911        } else if (refcount_cache_size_set) {
 912            *l2_cache_size = combined_cache_size - *refcount_cache_size;
 913        } else {
 914            /* Assign as much memory as possible to the L2 cache, and
 915             * use the remainder for the refcount cache */
 916            if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
 917                *l2_cache_size = max_l2_cache;
 918                *refcount_cache_size = combined_cache_size - *l2_cache_size;
 919            } else {
 920                *refcount_cache_size =
 921                    MIN(combined_cache_size, min_refcount_cache);
 922                *l2_cache_size = combined_cache_size - *refcount_cache_size;
 923            }
 924        }
 925    }
 926
 927    /*
 928     * If the L2 cache is not enough to cover the whole disk then
 929     * default to 4KB entries. Smaller entries reduce the cost of
 930     * loads and evictions and increase I/O performance.
 931     */
 932    if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) {
 933        *l2_cache_entry_size = MIN(s->cluster_size, 4096);
 934    }
 935
 936    /* l2_cache_size and refcount_cache_size are ensured to have at least
 937     * their minimum values in qcow2_update_options_prepare() */
 938
 939    if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
 940        *l2_cache_entry_size > s->cluster_size ||
 941        !is_power_of_2(*l2_cache_entry_size)) {
 942        error_setg(errp, "L2 cache entry size must be a power of two "
 943                   "between %d and the cluster size (%d)",
 944                   1 << MIN_CLUSTER_BITS, s->cluster_size);
 945        return;
 946    }
 947}
 948
 949typedef struct Qcow2ReopenState {
 950    Qcow2Cache *l2_table_cache;
 951    Qcow2Cache *refcount_block_cache;
 952    int l2_slice_size; /* Number of entries in a slice of the L2 table */
 953    bool use_lazy_refcounts;
 954    int overlap_check;
 955    bool discard_passthrough[QCOW2_DISCARD_MAX];
 956    uint64_t cache_clean_interval;
 957    QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
 958} Qcow2ReopenState;
 959
 960static int qcow2_update_options_prepare(BlockDriverState *bs,
 961                                        Qcow2ReopenState *r,
 962                                        QDict *options, int flags,
 963                                        Error **errp)
 964{
 965    BDRVQcow2State *s = bs->opaque;
 966    QemuOpts *opts = NULL;
 967    const char *opt_overlap_check, *opt_overlap_check_template;
 968    int overlap_check_template = 0;
 969    uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
 970    int i;
 971    const char *encryptfmt;
 972    QDict *encryptopts = NULL;
 973    Error *local_err = NULL;
 974    int ret;
 975
 976    qdict_extract_subqdict(options, &encryptopts, "encrypt.");
 977    encryptfmt = qdict_get_try_str(encryptopts, "format");
 978
 979    opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
 980    qemu_opts_absorb_qdict(opts, options, &local_err);
 981    if (local_err) {
 982        error_propagate(errp, local_err);
 983        ret = -EINVAL;
 984        goto fail;
 985    }
 986
 987    /* get L2 table/refcount block cache size from command line options */
 988    read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
 989                     &refcount_cache_size, &local_err);
 990    if (local_err) {
 991        error_propagate(errp, local_err);
 992        ret = -EINVAL;
 993        goto fail;
 994    }
 995
 996    l2_cache_size /= l2_cache_entry_size;
 997    if (l2_cache_size < MIN_L2_CACHE_SIZE) {
 998        l2_cache_size = MIN_L2_CACHE_SIZE;
 999    }
1000    if (l2_cache_size > INT_MAX) {
1001        error_setg(errp, "L2 cache size too big");
1002        ret = -EINVAL;
1003        goto fail;
1004    }
1005
1006    refcount_cache_size /= s->cluster_size;
1007    if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
1008        refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
1009    }
1010    if (refcount_cache_size > INT_MAX) {
1011        error_setg(errp, "Refcount cache size too big");
1012        ret = -EINVAL;
1013        goto fail;
1014    }
1015
1016    /* alloc new L2 table/refcount block cache, flush old one */
1017    if (s->l2_table_cache) {
1018        ret = qcow2_cache_flush(bs, s->l2_table_cache);
1019        if (ret) {
1020            error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
1021            goto fail;
1022        }
1023    }
1024
1025    if (s->refcount_block_cache) {
1026        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
1027        if (ret) {
1028            error_setg_errno(errp, -ret,
1029                             "Failed to flush the refcount block cache");
1030            goto fail;
1031        }
1032    }
1033
1034    r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t);
1035    r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
1036                                           l2_cache_entry_size);
1037    r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
1038                                                 s->cluster_size);
1039    if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
1040        error_setg(errp, "Could not allocate metadata caches");
1041        ret = -ENOMEM;
1042        goto fail;
1043    }
1044
1045    /* New interval for cache cleanup timer */
1046    r->cache_clean_interval =
1047        qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
1048                            DEFAULT_CACHE_CLEAN_INTERVAL);
1049#ifndef CONFIG_LINUX
1050    if (r->cache_clean_interval != 0) {
1051        error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
1052                   " not supported on this host");
1053        ret = -EINVAL;
1054        goto fail;
1055    }
1056#endif
1057    if (r->cache_clean_interval > UINT_MAX) {
1058        error_setg(errp, "Cache clean interval too big");
1059        ret = -EINVAL;
1060        goto fail;
1061    }
1062
1063    /* lazy-refcounts; flush if going from enabled to disabled */
1064    r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
1065        (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
1066    if (r->use_lazy_refcounts && s->qcow_version < 3) {
1067        error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
1068                   "qemu 1.1 compatibility level");
1069        ret = -EINVAL;
1070        goto fail;
1071    }
1072
1073    if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
1074        ret = qcow2_mark_clean(bs);
1075        if (ret < 0) {
1076            error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
1077            goto fail;
1078        }
1079    }
1080
1081    /* Overlap check options */
1082    opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
1083    opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
1084    if (opt_overlap_check_template && opt_overlap_check &&
1085        strcmp(opt_overlap_check_template, opt_overlap_check))
1086    {
1087        error_setg(errp, "Conflicting values for qcow2 options '"
1088                   QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
1089                   "' ('%s')", opt_overlap_check, opt_overlap_check_template);
1090        ret = -EINVAL;
1091        goto fail;
1092    }
1093    if (!opt_overlap_check) {
1094        opt_overlap_check = opt_overlap_check_template ?: "cached";
1095    }
1096
1097    if (!strcmp(opt_overlap_check, "none")) {
1098        overlap_check_template = 0;
1099    } else if (!strcmp(opt_overlap_check, "constant")) {
1100        overlap_check_template = QCOW2_OL_CONSTANT;
1101    } else if (!strcmp(opt_overlap_check, "cached")) {
1102        overlap_check_template = QCOW2_OL_CACHED;
1103    } else if (!strcmp(opt_overlap_check, "all")) {
1104        overlap_check_template = QCOW2_OL_ALL;
1105    } else {
1106        error_setg(errp, "Unsupported value '%s' for qcow2 option "
1107                   "'overlap-check'. Allowed are any of the following: "
1108                   "none, constant, cached, all", opt_overlap_check);
1109        ret = -EINVAL;
1110        goto fail;
1111    }
1112
1113    r->overlap_check = 0;
1114    for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
1115        /* overlap-check defines a template bitmask, but every flag may be
1116         * overwritten through the associated boolean option */
1117        r->overlap_check |=
1118            qemu_opt_get_bool(opts, overlap_bool_option_names[i],
1119                              overlap_check_template & (1 << i)) << i;
1120    }
1121
1122    r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
1123    r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
1124    r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
1125        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
1126                          flags & BDRV_O_UNMAP);
1127    r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
1128        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
1129    r->discard_passthrough[QCOW2_DISCARD_OTHER] =
1130        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
1131
1132    switch (s->crypt_method_header) {
1133    case QCOW_CRYPT_NONE:
1134        if (encryptfmt) {
1135            error_setg(errp, "No encryption in image header, but options "
1136                       "specified format '%s'", encryptfmt);
1137            ret = -EINVAL;
1138            goto fail;
1139        }
1140        break;
1141
1142    case QCOW_CRYPT_AES:
1143        if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
1144            error_setg(errp,
1145                       "Header reported 'aes' encryption format but "
1146                       "options specify '%s'", encryptfmt);
1147            ret = -EINVAL;
1148            goto fail;
1149        }
1150        qdict_put_str(encryptopts, "format", "qcow");
1151        r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1152        break;
1153
1154    case QCOW_CRYPT_LUKS:
1155        if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
1156            error_setg(errp,
1157                       "Header reported 'luks' encryption format but "
1158                       "options specify '%s'", encryptfmt);
1159            ret = -EINVAL;
1160            goto fail;
1161        }
1162        qdict_put_str(encryptopts, "format", "luks");
1163        r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1164        break;
1165
1166    default:
1167        error_setg(errp, "Unsupported encryption method %d",
1168                   s->crypt_method_header);
1169        break;
1170    }
1171    if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) {
1172        ret = -EINVAL;
1173        goto fail;
1174    }
1175
1176    ret = 0;
1177fail:
1178    qobject_unref(encryptopts);
1179    qemu_opts_del(opts);
1180    opts = NULL;
1181    return ret;
1182}
1183
1184static void qcow2_update_options_commit(BlockDriverState *bs,
1185                                        Qcow2ReopenState *r)
1186{
1187    BDRVQcow2State *s = bs->opaque;
1188    int i;
1189
1190    if (s->l2_table_cache) {
1191        qcow2_cache_destroy(s->l2_table_cache);
1192    }
1193    if (s->refcount_block_cache) {
1194        qcow2_cache_destroy(s->refcount_block_cache);
1195    }
1196    s->l2_table_cache = r->l2_table_cache;
1197    s->refcount_block_cache = r->refcount_block_cache;
1198    s->l2_slice_size = r->l2_slice_size;
1199
1200    s->overlap_check = r->overlap_check;
1201    s->use_lazy_refcounts = r->use_lazy_refcounts;
1202
1203    for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
1204        s->discard_passthrough[i] = r->discard_passthrough[i];
1205    }
1206
1207    if (s->cache_clean_interval != r->cache_clean_interval) {
1208        cache_clean_timer_del(bs);
1209        s->cache_clean_interval = r->cache_clean_interval;
1210        cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
1211    }
1212
1213    qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1214    s->crypto_opts = r->crypto_opts;
1215}
1216
1217static void qcow2_update_options_abort(BlockDriverState *bs,
1218                                       Qcow2ReopenState *r)
1219{
1220    if (r->l2_table_cache) {
1221        qcow2_cache_destroy(r->l2_table_cache);
1222    }
1223    if (r->refcount_block_cache) {
1224        qcow2_cache_destroy(r->refcount_block_cache);
1225    }
1226    qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
1227}
1228
1229static int qcow2_update_options(BlockDriverState *bs, QDict *options,
1230                                int flags, Error **errp)
1231{
1232    Qcow2ReopenState r = {};
1233    int ret;
1234
1235    ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
1236    if (ret >= 0) {
1237        qcow2_update_options_commit(bs, &r);
1238    } else {
1239        qcow2_update_options_abort(bs, &r);
1240    }
1241
1242    return ret;
1243}
1244
1245/* Called with s->lock held.  */
1246static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
1247                                      int flags, Error **errp)
1248{
1249    BDRVQcow2State *s = bs->opaque;
1250    unsigned int len, i;
1251    int ret = 0;
1252    QCowHeader header;
1253    Error *local_err = NULL;
1254    uint64_t ext_end;
1255    uint64_t l1_vm_state_index;
1256    bool update_header = false;
1257
1258    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
1259    if (ret < 0) {
1260        error_setg_errno(errp, -ret, "Could not read qcow2 header");
1261        goto fail;
1262    }
1263    header.magic = be32_to_cpu(header.magic);
1264    header.version = be32_to_cpu(header.version);
1265    header.backing_file_offset = be64_to_cpu(header.backing_file_offset);
1266    header.backing_file_size = be32_to_cpu(header.backing_file_size);
1267    header.size = be64_to_cpu(header.size);
1268    header.cluster_bits = be32_to_cpu(header.cluster_bits);
1269    header.crypt_method = be32_to_cpu(header.crypt_method);
1270    header.l1_table_offset = be64_to_cpu(header.l1_table_offset);
1271    header.l1_size = be32_to_cpu(header.l1_size);
1272    header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset);
1273    header.refcount_table_clusters =
1274        be32_to_cpu(header.refcount_table_clusters);
1275    header.snapshots_offset = be64_to_cpu(header.snapshots_offset);
1276    header.nb_snapshots = be32_to_cpu(header.nb_snapshots);
1277
1278    if (header.magic != QCOW_MAGIC) {
1279        error_setg(errp, "Image is not in qcow2 format");
1280        ret = -EINVAL;
1281        goto fail;
1282    }
1283    if (header.version < 2 || header.version > 3) {
1284        error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
1285        ret = -ENOTSUP;
1286        goto fail;
1287    }
1288
1289    s->qcow_version = header.version;
1290
1291    /* Initialise cluster size */
1292    if (header.cluster_bits < MIN_CLUSTER_BITS ||
1293        header.cluster_bits > MAX_CLUSTER_BITS) {
1294        error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
1295                   header.cluster_bits);
1296        ret = -EINVAL;
1297        goto fail;
1298    }
1299
1300    s->cluster_bits = header.cluster_bits;
1301    s->cluster_size = 1 << s->cluster_bits;
1302
1303    /* Initialise version 3 header fields */
1304    if (header.version == 2) {
1305        header.incompatible_features    = 0;
1306        header.compatible_features      = 0;
1307        header.autoclear_features       = 0;
1308        header.refcount_order           = 4;
1309        header.header_length            = 72;
1310    } else {
1311        header.incompatible_features =
1312            be64_to_cpu(header.incompatible_features);
1313        header.compatible_features = be64_to_cpu(header.compatible_features);
1314        header.autoclear_features = be64_to_cpu(header.autoclear_features);
1315        header.refcount_order = be32_to_cpu(header.refcount_order);
1316        header.header_length = be32_to_cpu(header.header_length);
1317
1318        if (header.header_length < 104) {
1319            error_setg(errp, "qcow2 header too short");
1320            ret = -EINVAL;
1321            goto fail;
1322        }
1323    }
1324
1325    if (header.header_length > s->cluster_size) {
1326        error_setg(errp, "qcow2 header exceeds cluster size");
1327        ret = -EINVAL;
1328        goto fail;
1329    }
1330
1331    if (header.header_length > sizeof(header)) {
1332        s->unknown_header_fields_size = header.header_length - sizeof(header);
1333        s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
1334        ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
1335                         s->unknown_header_fields_size);
1336        if (ret < 0) {
1337            error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
1338                             "fields");
1339            goto fail;
1340        }
1341    }
1342
1343    if (header.backing_file_offset > s->cluster_size) {
1344        error_setg(errp, "Invalid backing file offset");
1345        ret = -EINVAL;
1346        goto fail;
1347    }
1348
1349    if (header.backing_file_offset) {
1350        ext_end = header.backing_file_offset;
1351    } else {
1352        ext_end = 1 << header.cluster_bits;
1353    }
1354
1355    /* Handle feature bits */
1356    s->incompatible_features    = header.incompatible_features;
1357    s->compatible_features      = header.compatible_features;
1358    s->autoclear_features       = header.autoclear_features;
1359
1360    if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
1361        void *feature_table = NULL;
1362        qcow2_read_extensions(bs, header.header_length, ext_end,
1363                              &feature_table, flags, NULL, NULL);
1364        report_unsupported_feature(errp, feature_table,
1365                                   s->incompatible_features &
1366                                   ~QCOW2_INCOMPAT_MASK);
1367        ret = -ENOTSUP;
1368        g_free(feature_table);
1369        goto fail;
1370    }
1371
1372    if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
1373        /* Corrupt images may not be written to unless they are being repaired
1374         */
1375        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
1376            error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
1377                       "read/write");
1378            ret = -EACCES;
1379            goto fail;
1380        }
1381    }
1382
1383    /* Check support for various header values */
1384    if (header.refcount_order > 6) {
1385        error_setg(errp, "Reference count entry width too large; may not "
1386                   "exceed 64 bits");
1387        ret = -EINVAL;
1388        goto fail;
1389    }
1390    s->refcount_order = header.refcount_order;
1391    s->refcount_bits = 1 << s->refcount_order;
1392    s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
1393    s->refcount_max += s->refcount_max - 1;
1394
1395    s->crypt_method_header = header.crypt_method;
1396    if (s->crypt_method_header) {
1397        if (bdrv_uses_whitelist() &&
1398            s->crypt_method_header == QCOW_CRYPT_AES) {
1399            error_setg(errp,
1400                       "Use of AES-CBC encrypted qcow2 images is no longer "
1401                       "supported in system emulators");
1402            error_append_hint(errp,
1403                              "You can use 'qemu-img convert' to convert your "
1404                              "image to an alternative supported format, such "
1405                              "as unencrypted qcow2, or raw with the LUKS "
1406                              "format instead.\n");
1407            ret = -ENOSYS;
1408            goto fail;
1409        }
1410
1411        if (s->crypt_method_header == QCOW_CRYPT_AES) {
1412            s->crypt_physical_offset = false;
1413        } else {
1414            /* Assuming LUKS and any future crypt methods we
1415             * add will all use physical offsets, due to the
1416             * fact that the alternative is insecure...  */
1417            s->crypt_physical_offset = true;
1418        }
1419
1420        bs->encrypted = true;
1421    }
1422
1423    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
1424    s->l2_size = 1 << s->l2_bits;
1425    /* 2^(s->refcount_order - 3) is the refcount width in bytes */
1426    s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
1427    s->refcount_block_size = 1 << s->refcount_block_bits;
1428    bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
1429    s->csize_shift = (62 - (s->cluster_bits - 8));
1430    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
1431    s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
1432
1433    s->refcount_table_offset = header.refcount_table_offset;
1434    s->refcount_table_size =
1435        header.refcount_table_clusters << (s->cluster_bits - 3);
1436
1437    if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
1438        error_setg(errp, "Image does not contain a reference count table");
1439        ret = -EINVAL;
1440        goto fail;
1441    }
1442
1443    ret = qcow2_validate_table(bs, s->refcount_table_offset,
1444                               header.refcount_table_clusters,
1445                               s->cluster_size, QCOW_MAX_REFTABLE_SIZE,
1446                               "Reference count table", errp);
1447    if (ret < 0) {
1448        goto fail;
1449    }
1450
1451    if (!(flags & BDRV_O_CHECK)) {
1452        /*
1453         * The total size in bytes of the snapshot table is checked in
1454         * qcow2_read_snapshots() because the size of each snapshot is
1455         * variable and we don't know it yet.
1456         * Here we only check the offset and number of snapshots.
1457         */
1458        ret = qcow2_validate_table(bs, header.snapshots_offset,
1459                                   header.nb_snapshots,
1460                                   sizeof(QCowSnapshotHeader),
1461                                   sizeof(QCowSnapshotHeader) *
1462                                       QCOW_MAX_SNAPSHOTS,
1463                                   "Snapshot table", errp);
1464        if (ret < 0) {
1465            goto fail;
1466        }
1467    }
1468
1469    /* read the level 1 table */
1470    ret = qcow2_validate_table(bs, header.l1_table_offset,
1471                               header.l1_size, sizeof(uint64_t),
1472                               QCOW_MAX_L1_SIZE, "Active L1 table", errp);
1473    if (ret < 0) {
1474        goto fail;
1475    }
1476    s->l1_size = header.l1_size;
1477    s->l1_table_offset = header.l1_table_offset;
1478
1479    l1_vm_state_index = size_to_l1(s, header.size);
1480    if (l1_vm_state_index > INT_MAX) {
1481        error_setg(errp, "Image is too big");
1482        ret = -EFBIG;
1483        goto fail;
1484    }
1485    s->l1_vm_state_index = l1_vm_state_index;
1486
1487    /* the L1 table must contain at least enough entries to put
1488       header.size bytes */
1489    if (s->l1_size < s->l1_vm_state_index) {
1490        error_setg(errp, "L1 table is too small");
1491        ret = -EINVAL;
1492        goto fail;
1493    }
1494
1495    if (s->l1_size > 0) {
1496        s->l1_table = qemu_try_blockalign(bs->file->bs,
1497                                          s->l1_size * sizeof(uint64_t));
1498        if (s->l1_table == NULL) {
1499            error_setg(errp, "Could not allocate L1 table");
1500            ret = -ENOMEM;
1501            goto fail;
1502        }
1503        ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
1504                         s->l1_size * sizeof(uint64_t));
1505        if (ret < 0) {
1506            error_setg_errno(errp, -ret, "Could not read L1 table");
1507            goto fail;
1508        }
1509        for(i = 0;i < s->l1_size; i++) {
1510            s->l1_table[i] = be64_to_cpu(s->l1_table[i]);
1511        }
1512    }
1513
1514    /* Parse driver-specific options */
1515    ret = qcow2_update_options(bs, options, flags, errp);
1516    if (ret < 0) {
1517        goto fail;
1518    }
1519
1520    s->flags = flags;
1521
1522    ret = qcow2_refcount_init(bs);
1523    if (ret != 0) {
1524        error_setg_errno(errp, -ret, "Could not initialize refcount handling");
1525        goto fail;
1526    }
1527
1528    QLIST_INIT(&s->cluster_allocs);
1529    QTAILQ_INIT(&s->discards);
1530
1531    /* read qcow2 extensions */
1532    if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
1533                              flags, &update_header, &local_err)) {
1534        error_propagate(errp, local_err);
1535        ret = -EINVAL;
1536        goto fail;
1537    }
1538
1539    /* Open external data file */
1540    s->data_file = bdrv_open_child(NULL, options, "data-file", bs, &child_file,
1541                                   true, &local_err);
1542    if (local_err) {
1543        error_propagate(errp, local_err);
1544        ret = -EINVAL;
1545        goto fail;
1546    }
1547
1548    if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
1549        if (!s->data_file && s->image_data_file) {
1550            s->data_file = bdrv_open_child(s->image_data_file, options,
1551                                           "data-file", bs, &child_file,
1552                                           false, errp);
1553            if (!s->data_file) {
1554                ret = -EINVAL;
1555                goto fail;
1556            }
1557        }
1558        if (!s->data_file) {
1559            error_setg(errp, "'data-file' is required for this image");
1560            ret = -EINVAL;
1561            goto fail;
1562        }
1563    } else {
1564        if (s->data_file) {
1565            error_setg(errp, "'data-file' can only be set for images with an "
1566                             "external data file");
1567            ret = -EINVAL;
1568            goto fail;
1569        }
1570
1571        s->data_file = bs->file;
1572
1573        if (data_file_is_raw(bs)) {
1574            error_setg(errp, "data-file-raw requires a data file");
1575            ret = -EINVAL;
1576            goto fail;
1577        }
1578    }
1579
1580    /* qcow2_read_extension may have set up the crypto context
1581     * if the crypt method needs a header region, some methods
1582     * don't need header extensions, so must check here
1583     */
1584    if (s->crypt_method_header && !s->crypto) {
1585        if (s->crypt_method_header == QCOW_CRYPT_AES) {
1586            unsigned int cflags = 0;
1587            if (flags & BDRV_O_NO_IO) {
1588                cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
1589            }
1590            s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
1591                                           NULL, NULL, cflags,
1592                                           QCOW2_MAX_THREADS, errp);
1593            if (!s->crypto) {
1594                ret = -EINVAL;
1595                goto fail;
1596            }
1597        } else if (!(flags & BDRV_O_NO_IO)) {
1598            error_setg(errp, "Missing CRYPTO header for crypt method %d",
1599                       s->crypt_method_header);
1600            ret = -EINVAL;
1601            goto fail;
1602        }
1603    }
1604
1605    /* read the backing file name */
1606    if (header.backing_file_offset != 0) {
1607        len = header.backing_file_size;
1608        if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
1609            len >= sizeof(bs->backing_file)) {
1610            error_setg(errp, "Backing file name too long");
1611            ret = -EINVAL;
1612            goto fail;
1613        }
1614        ret = bdrv_pread(bs->file, header.backing_file_offset,
1615                         bs->auto_backing_file, len);
1616        if (ret < 0) {
1617            error_setg_errno(errp, -ret, "Could not read backing file name");
1618            goto fail;
1619        }
1620        bs->auto_backing_file[len] = '\0';
1621        pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1622                bs->auto_backing_file);
1623        s->image_backing_file = g_strdup(bs->auto_backing_file);
1624    }
1625
1626    /*
1627     * Internal snapshots; skip reading them in check mode, because
1628     * we do not need them then, and we do not want to abort because
1629     * of a broken table.
1630     */
1631    if (!(flags & BDRV_O_CHECK)) {
1632        s->snapshots_offset = header.snapshots_offset;
1633        s->nb_snapshots = header.nb_snapshots;
1634
1635        ret = qcow2_read_snapshots(bs, errp);
1636        if (ret < 0) {
1637            goto fail;
1638        }
1639    }
1640
1641    /* Clear unknown autoclear feature bits */
1642    update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
1643    update_header =
1644        update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
1645    if (update_header) {
1646        s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
1647    }
1648
1649    /* == Handle persistent dirty bitmaps ==
1650     *
1651     * We want load dirty bitmaps in three cases:
1652     *
1653     * 1. Normal open of the disk in active mode, not related to invalidation
1654     *    after migration.
1655     *
1656     * 2. Invalidation of the target vm after pre-copy phase of migration, if
1657     *    bitmaps are _not_ migrating through migration channel, i.e.
1658     *    'dirty-bitmaps' capability is disabled.
1659     *
1660     * 3. Invalidation of source vm after failed or canceled migration.
1661     *    This is a very interesting case. There are two possible types of
1662     *    bitmaps:
1663     *
1664     *    A. Stored on inactivation and removed. They should be loaded from the
1665     *       image.
1666     *
1667     *    B. Not stored: not-persistent bitmaps and bitmaps, migrated through
1668     *       the migration channel (with dirty-bitmaps capability).
1669     *
1670     *    On the other hand, there are two possible sub-cases:
1671     *
1672     *    3.1 disk was changed by somebody else while were inactive. In this
1673     *        case all in-RAM dirty bitmaps (both persistent and not) are
1674     *        definitely invalid. And we don't have any method to determine
1675     *        this.
1676     *
1677     *        Simple and safe thing is to just drop all the bitmaps of type B on
1678     *        inactivation. But in this case we lose bitmaps in valid 4.2 case.
1679     *
1680     *        On the other hand, resuming source vm, if disk was already changed
1681     *        is a bad thing anyway: not only bitmaps, the whole vm state is
1682     *        out of sync with disk.
1683     *
1684     *        This means, that user or management tool, who for some reason
1685     *        decided to resume source vm, after disk was already changed by
1686     *        target vm, should at least drop all dirty bitmaps by hand.
1687     *
1688     *        So, we can ignore this case for now, but TODO: "generation"
1689     *        extension for qcow2, to determine, that image was changed after
1690     *        last inactivation. And if it is changed, we will drop (or at least
1691     *        mark as 'invalid' all the bitmaps of type B, both persistent
1692     *        and not).
1693     *
1694     *    3.2 disk was _not_ changed while were inactive. Bitmaps may be saved
1695     *        to disk ('dirty-bitmaps' capability disabled), or not saved
1696     *        ('dirty-bitmaps' capability enabled), but we don't need to care
1697     *        of: let's load bitmaps as always: stored bitmaps will be loaded,
1698     *        and not stored has flag IN_USE=1 in the image and will be skipped
1699     *        on loading.
1700     *
1701     * One remaining possible case when we don't want load bitmaps:
1702     *
1703     * 4. Open disk in inactive mode in target vm (bitmaps are migrating or
1704     *    will be loaded on invalidation, no needs try loading them before)
1705     */
1706
1707    if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
1708        /* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */
1709        bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err);
1710        if (local_err != NULL) {
1711            error_propagate(errp, local_err);
1712            ret = -EINVAL;
1713            goto fail;
1714        }
1715
1716        update_header = update_header && !header_updated;
1717    }
1718
1719    if (update_header) {
1720        ret = qcow2_update_header(bs);
1721        if (ret < 0) {
1722            error_setg_errno(errp, -ret, "Could not update qcow2 header");
1723            goto fail;
1724        }
1725    }
1726
1727    bs->supported_zero_flags = header.version >= 3 ?
1728                               BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0;
1729
1730    /* Repair image if dirty */
1731    if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
1732        (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
1733        BdrvCheckResult result = {0};
1734
1735        ret = qcow2_co_check_locked(bs, &result,
1736                                    BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
1737        if (ret < 0 || result.check_errors) {
1738            if (ret >= 0) {
1739                ret = -EIO;
1740            }
1741            error_setg_errno(errp, -ret, "Could not repair dirty image");
1742            goto fail;
1743        }
1744    }
1745
1746#ifdef DEBUG_ALLOC
1747    {
1748        BdrvCheckResult result = {0};
1749        qcow2_check_refcounts(bs, &result, 0);
1750    }
1751#endif
1752
1753    qemu_co_queue_init(&s->thread_task_queue);
1754
1755    return ret;
1756
1757 fail:
1758    g_free(s->image_data_file);
1759    if (has_data_file(bs)) {
1760        bdrv_unref_child(bs, s->data_file);
1761        s->data_file = NULL;
1762    }
1763    g_free(s->unknown_header_fields);
1764    cleanup_unknown_header_ext(bs);
1765    qcow2_free_snapshots(bs);
1766    qcow2_refcount_close(bs);
1767    qemu_vfree(s->l1_table);
1768    /* else pre-write overlap checks in cache_destroy may crash */
1769    s->l1_table = NULL;
1770    cache_clean_timer_del(bs);
1771    if (s->l2_table_cache) {
1772        qcow2_cache_destroy(s->l2_table_cache);
1773    }
1774    if (s->refcount_block_cache) {
1775        qcow2_cache_destroy(s->refcount_block_cache);
1776    }
1777    qcrypto_block_free(s->crypto);
1778    qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1779    return ret;
1780}
1781
1782typedef struct QCow2OpenCo {
1783    BlockDriverState *bs;
1784    QDict *options;
1785    int flags;
1786    Error **errp;
1787    int ret;
1788} QCow2OpenCo;
1789
1790static void coroutine_fn qcow2_open_entry(void *opaque)
1791{
1792    QCow2OpenCo *qoc = opaque;
1793    BDRVQcow2State *s = qoc->bs->opaque;
1794
1795    qemu_co_mutex_lock(&s->lock);
1796    qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp);
1797    qemu_co_mutex_unlock(&s->lock);
1798}
1799
1800static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
1801                      Error **errp)
1802{
1803    BDRVQcow2State *s = bs->opaque;
1804    QCow2OpenCo qoc = {
1805        .bs = bs,
1806        .options = options,
1807        .flags = flags,
1808        .errp = errp,
1809        .ret = -EINPROGRESS
1810    };
1811
1812    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
1813                               false, errp);
1814    if (!bs->file) {
1815        return -EINVAL;
1816    }
1817
1818    /* Initialise locks */
1819    qemu_co_mutex_init(&s->lock);
1820
1821    if (qemu_in_coroutine()) {
1822        /* From bdrv_co_create.  */
1823        qcow2_open_entry(&qoc);
1824    } else {
1825        assert(qemu_get_current_aio_context() == qemu_get_aio_context());
1826        qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
1827        BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
1828    }
1829    return qoc.ret;
1830}
1831
1832static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
1833{
1834    BDRVQcow2State *s = bs->opaque;
1835
1836    if (bs->encrypted) {
1837        /* Encryption works on a sector granularity */
1838        bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
1839    }
1840    bs->bl.pwrite_zeroes_alignment = s->cluster_size;
1841    bs->bl.pdiscard_alignment = s->cluster_size;
1842}
1843
1844static int qcow2_reopen_prepare(BDRVReopenState *state,
1845                                BlockReopenQueue *queue, Error **errp)
1846{
1847    Qcow2ReopenState *r;
1848    int ret;
1849
1850    r = g_new0(Qcow2ReopenState, 1);
1851    state->opaque = r;
1852
1853    ret = qcow2_update_options_prepare(state->bs, r, state->options,
1854                                       state->flags, errp);
1855    if (ret < 0) {
1856        goto fail;
1857    }
1858
1859    /* We need to write out any unwritten data if we reopen read-only. */
1860    if ((state->flags & BDRV_O_RDWR) == 0) {
1861        ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
1862        if (ret < 0) {
1863            goto fail;
1864        }
1865
1866        ret = bdrv_flush(state->bs);
1867        if (ret < 0) {
1868            goto fail;
1869        }
1870
1871        ret = qcow2_mark_clean(state->bs);
1872        if (ret < 0) {
1873            goto fail;
1874        }
1875    }
1876
1877    return 0;
1878
1879fail:
1880    qcow2_update_options_abort(state->bs, r);
1881    g_free(r);
1882    return ret;
1883}
1884
1885static void qcow2_reopen_commit(BDRVReopenState *state)
1886{
1887    qcow2_update_options_commit(state->bs, state->opaque);
1888    g_free(state->opaque);
1889}
1890
1891static void qcow2_reopen_commit_post(BDRVReopenState *state)
1892{
1893    if (state->flags & BDRV_O_RDWR) {
1894        Error *local_err = NULL;
1895
1896        if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) {
1897            /*
1898             * This is not fatal, bitmaps just left read-only, so all following
1899             * writes will fail. User can remove read-only bitmaps to unblock
1900             * writes or retry reopen.
1901             */
1902            error_reportf_err(local_err,
1903                              "%s: Failed to make dirty bitmaps writable: ",
1904                              bdrv_get_node_name(state->bs));
1905        }
1906    }
1907}
1908
1909static void qcow2_reopen_abort(BDRVReopenState *state)
1910{
1911    qcow2_update_options_abort(state->bs, state->opaque);
1912    g_free(state->opaque);
1913}
1914
1915static void qcow2_join_options(QDict *options, QDict *old_options)
1916{
1917    bool has_new_overlap_template =
1918        qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
1919        qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
1920    bool has_new_total_cache_size =
1921        qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
1922    bool has_all_cache_options;
1923
1924    /* New overlap template overrides all old overlap options */
1925    if (has_new_overlap_template) {
1926        qdict_del(old_options, QCOW2_OPT_OVERLAP);
1927        qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
1928        qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
1929        qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
1930        qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
1931        qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
1932        qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
1933        qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
1934        qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
1935        qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
1936    }
1937
1938    /* New total cache size overrides all old options */
1939    if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
1940        qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
1941        qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1942    }
1943
1944    qdict_join(options, old_options, false);
1945
1946    /*
1947     * If after merging all cache size options are set, an old total size is
1948     * overwritten. Do keep all options, however, if all three are new. The
1949     * resulting error message is what we want to happen.
1950     */
1951    has_all_cache_options =
1952        qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
1953        qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
1954        qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1955
1956    if (has_all_cache_options && !has_new_total_cache_size) {
1957        qdict_del(options, QCOW2_OPT_CACHE_SIZE);
1958    }
1959}
1960
1961static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
1962                                              bool want_zero,
1963                                              int64_t offset, int64_t count,
1964                                              int64_t *pnum, int64_t *map,
1965                                              BlockDriverState **file)
1966{
1967    BDRVQcow2State *s = bs->opaque;
1968    uint64_t cluster_offset;
1969    unsigned int bytes;
1970    int ret, status = 0;
1971
1972    qemu_co_mutex_lock(&s->lock);
1973
1974    if (!s->metadata_preallocation_checked) {
1975        ret = qcow2_detect_metadata_preallocation(bs);
1976        s->metadata_preallocation = (ret == 1);
1977        s->metadata_preallocation_checked = true;
1978    }
1979
1980    bytes = MIN(INT_MAX, count);
1981    ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
1982    qemu_co_mutex_unlock(&s->lock);
1983    if (ret < 0) {
1984        return ret;
1985    }
1986
1987    *pnum = bytes;
1988
1989    if ((ret == QCOW2_CLUSTER_NORMAL || ret == QCOW2_CLUSTER_ZERO_ALLOC) &&
1990        !s->crypto) {
1991        *map = cluster_offset | offset_into_cluster(s, offset);
1992        *file = s->data_file->bs;
1993        status |= BDRV_BLOCK_OFFSET_VALID;
1994    }
1995    if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
1996        status |= BDRV_BLOCK_ZERO;
1997    } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
1998        status |= BDRV_BLOCK_DATA;
1999    }
2000    if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) &&
2001        (status & BDRV_BLOCK_OFFSET_VALID))
2002    {
2003        status |= BDRV_BLOCK_RECURSE;
2004    }
2005    return status;
2006}
2007
2008static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
2009                                            QCowL2Meta **pl2meta,
2010                                            bool link_l2)
2011{
2012    int ret = 0;
2013    QCowL2Meta *l2meta = *pl2meta;
2014
2015    while (l2meta != NULL) {
2016        QCowL2Meta *next;
2017
2018        if (link_l2) {
2019            ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
2020            if (ret) {
2021                goto out;
2022            }
2023        } else {
2024            qcow2_alloc_cluster_abort(bs, l2meta);
2025        }
2026
2027        /* Take the request off the list of running requests */
2028        if (l2meta->nb_clusters != 0) {
2029            QLIST_REMOVE(l2meta, next_in_flight);
2030        }
2031
2032        qemu_co_queue_restart_all(&l2meta->dependent_requests);
2033
2034        next = l2meta->next;
2035        g_free(l2meta);
2036        l2meta = next;
2037    }
2038out:
2039    *pl2meta = l2meta;
2040    return ret;
2041}
2042
2043static coroutine_fn int
2044qcow2_co_preadv_encrypted(BlockDriverState *bs,
2045                           uint64_t file_cluster_offset,
2046                           uint64_t offset,
2047                           uint64_t bytes,
2048                           QEMUIOVector *qiov,
2049                           uint64_t qiov_offset)
2050{
2051    int ret;
2052    BDRVQcow2State *s = bs->opaque;
2053    uint8_t *buf;
2054
2055    assert(bs->encrypted && s->crypto);
2056    assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2057
2058    /*
2059     * For encrypted images, read everything into a temporary
2060     * contiguous buffer on which the AES functions can work.
2061     * Also, decryption in a separate buffer is better as it
2062     * prevents the guest from learning information about the
2063     * encrypted nature of the virtual disk.
2064     */
2065
2066    buf = qemu_try_blockalign(s->data_file->bs, bytes);
2067    if (buf == NULL) {
2068        return -ENOMEM;
2069    }
2070
2071    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
2072    ret = bdrv_co_pread(s->data_file,
2073                        file_cluster_offset + offset_into_cluster(s, offset),
2074                        bytes, buf, 0);
2075    if (ret < 0) {
2076        goto fail;
2077    }
2078
2079    if (qcow2_co_decrypt(bs,
2080                         file_cluster_offset + offset_into_cluster(s, offset),
2081                         offset, buf, bytes) < 0)
2082    {
2083        ret = -EIO;
2084        goto fail;
2085    }
2086    qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes);
2087
2088fail:
2089    qemu_vfree(buf);
2090
2091    return ret;
2092}
2093
2094typedef struct Qcow2AioTask {
2095    AioTask task;
2096
2097    BlockDriverState *bs;
2098    QCow2ClusterType cluster_type; /* only for read */
2099    uint64_t file_cluster_offset;
2100    uint64_t offset;
2101    uint64_t bytes;
2102    QEMUIOVector *qiov;
2103    uint64_t qiov_offset;
2104    QCowL2Meta *l2meta; /* only for write */
2105} Qcow2AioTask;
2106
2107static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
2108static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
2109                                       AioTaskPool *pool,
2110                                       AioTaskFunc func,
2111                                       QCow2ClusterType cluster_type,
2112                                       uint64_t file_cluster_offset,
2113                                       uint64_t offset,
2114                                       uint64_t bytes,
2115                                       QEMUIOVector *qiov,
2116                                       size_t qiov_offset,
2117                                       QCowL2Meta *l2meta)
2118{
2119    Qcow2AioTask local_task;
2120    Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
2121
2122    *task = (Qcow2AioTask) {
2123        .task.func = func,
2124        .bs = bs,
2125        .cluster_type = cluster_type,
2126        .qiov = qiov,
2127        .file_cluster_offset = file_cluster_offset,
2128        .offset = offset,
2129        .bytes = bytes,
2130        .qiov_offset = qiov_offset,
2131        .l2meta = l2meta,
2132    };
2133
2134    trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
2135                         func == qcow2_co_preadv_task_entry ? "read" : "write",
2136                         cluster_type, file_cluster_offset, offset, bytes,
2137                         qiov, qiov_offset);
2138
2139    if (!pool) {
2140        return func(&task->task);
2141    }
2142
2143    aio_task_pool_start_task(pool, &task->task);
2144
2145    return 0;
2146}
2147
2148static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
2149                                             QCow2ClusterType cluster_type,
2150                                             uint64_t file_cluster_offset,
2151                                             uint64_t offset, uint64_t bytes,
2152                                             QEMUIOVector *qiov,
2153                                             size_t qiov_offset)
2154{
2155    BDRVQcow2State *s = bs->opaque;
2156    int offset_in_cluster = offset_into_cluster(s, offset);
2157
2158    switch (cluster_type) {
2159    case QCOW2_CLUSTER_ZERO_PLAIN:
2160    case QCOW2_CLUSTER_ZERO_ALLOC:
2161        /* Both zero types are handled in qcow2_co_preadv_part */
2162        g_assert_not_reached();
2163
2164    case QCOW2_CLUSTER_UNALLOCATED:
2165        assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
2166
2167        BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
2168        return bdrv_co_preadv_part(bs->backing, offset, bytes,
2169                                   qiov, qiov_offset, 0);
2170
2171    case QCOW2_CLUSTER_COMPRESSED:
2172        return qcow2_co_preadv_compressed(bs, file_cluster_offset,
2173                                          offset, bytes, qiov, qiov_offset);
2174
2175    case QCOW2_CLUSTER_NORMAL:
2176        assert(offset_into_cluster(s, file_cluster_offset) == 0);
2177        if (bs->encrypted) {
2178            return qcow2_co_preadv_encrypted(bs, file_cluster_offset,
2179                                             offset, bytes, qiov, qiov_offset);
2180        }
2181
2182        BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
2183        return bdrv_co_preadv_part(s->data_file,
2184                                   file_cluster_offset + offset_in_cluster,
2185                                   bytes, qiov, qiov_offset, 0);
2186
2187    default:
2188        g_assert_not_reached();
2189    }
2190
2191    g_assert_not_reached();
2192}
2193
2194static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
2195{
2196    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
2197
2198    assert(!t->l2meta);
2199
2200    return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset,
2201                                t->offset, t->bytes, t->qiov, t->qiov_offset);
2202}
2203
2204static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
2205                                             uint64_t offset, uint64_t bytes,
2206                                             QEMUIOVector *qiov,
2207                                             size_t qiov_offset, int flags)
2208{
2209    BDRVQcow2State *s = bs->opaque;
2210    int ret = 0;
2211    unsigned int cur_bytes; /* number of bytes in current iteration */
2212    uint64_t cluster_offset = 0;
2213    AioTaskPool *aio = NULL;
2214
2215    while (bytes != 0 && aio_task_pool_status(aio) == 0) {
2216        /* prepare next request */
2217        cur_bytes = MIN(bytes, INT_MAX);
2218        if (s->crypto) {
2219            cur_bytes = MIN(cur_bytes,
2220                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2221        }
2222
2223        qemu_co_mutex_lock(&s->lock);
2224        ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
2225        qemu_co_mutex_unlock(&s->lock);
2226        if (ret < 0) {
2227            goto out;
2228        }
2229
2230        if (ret == QCOW2_CLUSTER_ZERO_PLAIN ||
2231            ret == QCOW2_CLUSTER_ZERO_ALLOC ||
2232            (ret == QCOW2_CLUSTER_UNALLOCATED && !bs->backing))
2233        {
2234            qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
2235        } else {
2236            if (!aio && cur_bytes != bytes) {
2237                aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
2238            }
2239            ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret,
2240                                 cluster_offset, offset, cur_bytes,
2241                                 qiov, qiov_offset, NULL);
2242            if (ret < 0) {
2243                goto out;
2244            }
2245        }
2246
2247        bytes -= cur_bytes;
2248        offset += cur_bytes;
2249        qiov_offset += cur_bytes;
2250    }
2251
2252out:
2253    if (aio) {
2254        aio_task_pool_wait_all(aio);
2255        if (ret == 0) {
2256            ret = aio_task_pool_status(aio);
2257        }
2258        g_free(aio);
2259    }
2260
2261    return ret;
2262}
2263
2264/* Check if it's possible to merge a write request with the writing of
2265 * the data from the COW regions */
2266static bool merge_cow(uint64_t offset, unsigned bytes,
2267                      QEMUIOVector *qiov, size_t qiov_offset,
2268                      QCowL2Meta *l2meta)
2269{
2270    QCowL2Meta *m;
2271
2272    for (m = l2meta; m != NULL; m = m->next) {
2273        /* If both COW regions are empty then there's nothing to merge */
2274        if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
2275            continue;
2276        }
2277
2278        /* If COW regions are handled already, skip this too */
2279        if (m->skip_cow) {
2280            continue;
2281        }
2282
2283        /* The data (middle) region must be immediately after the
2284         * start region */
2285        if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
2286            continue;
2287        }
2288
2289        /* The end region must be immediately after the data (middle)
2290         * region */
2291        if (m->offset + m->cow_end.offset != offset + bytes) {
2292            continue;
2293        }
2294
2295        /* Make sure that adding both COW regions to the QEMUIOVector
2296         * does not exceed IOV_MAX */
2297        if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
2298            continue;
2299        }
2300
2301        m->data_qiov = qiov;
2302        m->data_qiov_offset = qiov_offset;
2303        return true;
2304    }
2305
2306    return false;
2307}
2308
2309static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes)
2310{
2311    int64_t nr;
2312    return !bytes ||
2313        (!bdrv_is_allocated_above(bs, NULL, false, offset, bytes, &nr) &&
2314         nr == bytes);
2315}
2316
2317static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
2318{
2319    /*
2320     * This check is designed for optimization shortcut so it must be
2321     * efficient.
2322     * Instead of is_zero(), use is_unallocated() as it is faster (but not
2323     * as accurate and can result in false negatives).
2324     */
2325    return is_unallocated(bs, m->offset + m->cow_start.offset,
2326                          m->cow_start.nb_bytes) &&
2327           is_unallocated(bs, m->offset + m->cow_end.offset,
2328                          m->cow_end.nb_bytes);
2329}
2330
2331static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
2332{
2333    BDRVQcow2State *s = bs->opaque;
2334    QCowL2Meta *m;
2335
2336    if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
2337        return 0;
2338    }
2339
2340    if (bs->encrypted) {
2341        return 0;
2342    }
2343
2344    for (m = l2meta; m != NULL; m = m->next) {
2345        int ret;
2346
2347        if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
2348            continue;
2349        }
2350
2351        if (!is_zero_cow(bs, m)) {
2352            continue;
2353        }
2354
2355        /*
2356         * instead of writing zero COW buffers,
2357         * efficiently zero out the whole clusters
2358         */
2359
2360        ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset,
2361                                            m->nb_clusters * s->cluster_size,
2362                                            true);
2363        if (ret < 0) {
2364            return ret;
2365        }
2366
2367        BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
2368        ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset,
2369                                    m->nb_clusters * s->cluster_size,
2370                                    BDRV_REQ_NO_FALLBACK);
2371        if (ret < 0) {
2372            if (ret != -ENOTSUP && ret != -EAGAIN) {
2373                return ret;
2374            }
2375            continue;
2376        }
2377
2378        trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
2379        m->skip_cow = true;
2380    }
2381    return 0;
2382}
2383
2384/*
2385 * qcow2_co_pwritev_task
2386 * Called with s->lock unlocked
2387 * l2meta  - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
2388 *           not use it somehow after qcow2_co_pwritev_task() call
2389 */
2390static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
2391                                              uint64_t file_cluster_offset,
2392                                              uint64_t offset, uint64_t bytes,
2393                                              QEMUIOVector *qiov,
2394                                              uint64_t qiov_offset,
2395                                              QCowL2Meta *l2meta)
2396{
2397    int ret;
2398    BDRVQcow2State *s = bs->opaque;
2399    void *crypt_buf = NULL;
2400    int offset_in_cluster = offset_into_cluster(s, offset);
2401    QEMUIOVector encrypted_qiov;
2402
2403    if (bs->encrypted) {
2404        assert(s->crypto);
2405        assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2406        crypt_buf = qemu_try_blockalign(bs->file->bs, bytes);
2407        if (crypt_buf == NULL) {
2408            ret = -ENOMEM;
2409            goto out_unlocked;
2410        }
2411        qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes);
2412
2413        if (qcow2_co_encrypt(bs, file_cluster_offset + offset_in_cluster,
2414                             offset, crypt_buf, bytes) < 0)
2415        {
2416            ret = -EIO;
2417            goto out_unlocked;
2418        }
2419
2420        qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes);
2421        qiov = &encrypted_qiov;
2422        qiov_offset = 0;
2423    }
2424
2425    /* Try to efficiently initialize the physical space with zeroes */
2426    ret = handle_alloc_space(bs, l2meta);
2427    if (ret < 0) {
2428        goto out_unlocked;
2429    }
2430
2431    /*
2432     * If we need to do COW, check if it's possible to merge the
2433     * writing of the guest data together with that of the COW regions.
2434     * If it's not possible (or not necessary) then write the
2435     * guest data now.
2436     */
2437    if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
2438        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
2439        trace_qcow2_writev_data(qemu_coroutine_self(),
2440                                file_cluster_offset + offset_in_cluster);
2441        ret = bdrv_co_pwritev_part(s->data_file,
2442                                   file_cluster_offset + offset_in_cluster,
2443                                   bytes, qiov, qiov_offset, 0);
2444        if (ret < 0) {
2445            goto out_unlocked;
2446        }
2447    }
2448
2449    qemu_co_mutex_lock(&s->lock);
2450
2451    ret = qcow2_handle_l2meta(bs, &l2meta, true);
2452    goto out_locked;
2453
2454out_unlocked:
2455    qemu_co_mutex_lock(&s->lock);
2456
2457out_locked:
2458    qcow2_handle_l2meta(bs, &l2meta, false);
2459    qemu_co_mutex_unlock(&s->lock);
2460
2461    qemu_vfree(crypt_buf);
2462
2463    return ret;
2464}
2465
2466static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
2467{
2468    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
2469
2470    assert(!t->cluster_type);
2471
2472    return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset,
2473                                 t->offset, t->bytes, t->qiov, t->qiov_offset,
2474                                 t->l2meta);
2475}
2476
2477static coroutine_fn int qcow2_co_pwritev_part(
2478        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
2479        QEMUIOVector *qiov, size_t qiov_offset, int flags)
2480{
2481    BDRVQcow2State *s = bs->opaque;
2482    int offset_in_cluster;
2483    int ret;
2484    unsigned int cur_bytes; /* number of sectors in current iteration */
2485    uint64_t cluster_offset;
2486    QCowL2Meta *l2meta = NULL;
2487    AioTaskPool *aio = NULL;
2488
2489    trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
2490
2491    while (bytes != 0 && aio_task_pool_status(aio) == 0) {
2492
2493        l2meta = NULL;
2494
2495        trace_qcow2_writev_start_part(qemu_coroutine_self());
2496        offset_in_cluster = offset_into_cluster(s, offset);
2497        cur_bytes = MIN(bytes, INT_MAX);
2498        if (bs->encrypted) {
2499            cur_bytes = MIN(cur_bytes,
2500                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
2501                            - offset_in_cluster);
2502        }
2503
2504        qemu_co_mutex_lock(&s->lock);
2505
2506        ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
2507                                         &cluster_offset, &l2meta);
2508        if (ret < 0) {
2509            goto out_locked;
2510        }
2511
2512        assert(offset_into_cluster(s, cluster_offset) == 0);
2513
2514        ret = qcow2_pre_write_overlap_check(bs, 0,
2515                                            cluster_offset + offset_in_cluster,
2516                                            cur_bytes, true);
2517        if (ret < 0) {
2518            goto out_locked;
2519        }
2520
2521        qemu_co_mutex_unlock(&s->lock);
2522
2523        if (!aio && cur_bytes != bytes) {
2524            aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
2525        }
2526        ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
2527                             cluster_offset, offset, cur_bytes,
2528                             qiov, qiov_offset, l2meta);
2529        l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */
2530        if (ret < 0) {
2531            goto fail_nometa;
2532        }
2533
2534        bytes -= cur_bytes;
2535        offset += cur_bytes;
2536        qiov_offset += cur_bytes;
2537        trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
2538    }
2539    ret = 0;
2540
2541    qemu_co_mutex_lock(&s->lock);
2542
2543out_locked:
2544    qcow2_handle_l2meta(bs, &l2meta, false);
2545
2546    qemu_co_mutex_unlock(&s->lock);
2547
2548fail_nometa:
2549    if (aio) {
2550        aio_task_pool_wait_all(aio);
2551        if (ret == 0) {
2552            ret = aio_task_pool_status(aio);
2553        }
2554        g_free(aio);
2555    }
2556
2557    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
2558
2559    return ret;
2560}
2561
2562static int qcow2_inactivate(BlockDriverState *bs)
2563{
2564    BDRVQcow2State *s = bs->opaque;
2565    int ret, result = 0;
2566    Error *local_err = NULL;
2567
2568    qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err);
2569    if (local_err != NULL) {
2570        result = -EINVAL;
2571        error_reportf_err(local_err, "Lost persistent bitmaps during "
2572                          "inactivation of node '%s': ",
2573                          bdrv_get_device_or_node_name(bs));
2574    }
2575
2576    ret = qcow2_cache_flush(bs, s->l2_table_cache);
2577    if (ret) {
2578        result = ret;
2579        error_report("Failed to flush the L2 table cache: %s",
2580                     strerror(-ret));
2581    }
2582
2583    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
2584    if (ret) {
2585        result = ret;
2586        error_report("Failed to flush the refcount block cache: %s",
2587                     strerror(-ret));
2588    }
2589
2590    if (result == 0) {
2591        qcow2_mark_clean(bs);
2592    }
2593
2594    return result;
2595}
2596
2597static void qcow2_close(BlockDriverState *bs)
2598{
2599    BDRVQcow2State *s = bs->opaque;
2600    qemu_vfree(s->l1_table);
2601    /* else pre-write overlap checks in cache_destroy may crash */
2602    s->l1_table = NULL;
2603
2604    if (!(s->flags & BDRV_O_INACTIVE)) {
2605        qcow2_inactivate(bs);
2606    }
2607
2608    cache_clean_timer_del(bs);
2609    qcow2_cache_destroy(s->l2_table_cache);
2610    qcow2_cache_destroy(s->refcount_block_cache);
2611
2612    qcrypto_block_free(s->crypto);
2613    s->crypto = NULL;
2614    qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
2615
2616    g_free(s->unknown_header_fields);
2617    cleanup_unknown_header_ext(bs);
2618
2619    g_free(s->image_data_file);
2620    g_free(s->image_backing_file);
2621    g_free(s->image_backing_format);
2622
2623    if (has_data_file(bs)) {
2624        bdrv_unref_child(bs, s->data_file);
2625        s->data_file = NULL;
2626    }
2627
2628    qcow2_refcount_close(bs);
2629    qcow2_free_snapshots(bs);
2630}
2631
2632static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
2633                                                   Error **errp)
2634{
2635    BDRVQcow2State *s = bs->opaque;
2636    int flags = s->flags;
2637    QCryptoBlock *crypto = NULL;
2638    QDict *options;
2639    Error *local_err = NULL;
2640    int ret;
2641
2642    /*
2643     * Backing files are read-only which makes all of their metadata immutable,
2644     * that means we don't have to worry about reopening them here.
2645     */
2646
2647    crypto = s->crypto;
2648    s->crypto = NULL;
2649
2650    qcow2_close(bs);
2651
2652    memset(s, 0, sizeof(BDRVQcow2State));
2653    options = qdict_clone_shallow(bs->options);
2654
2655    flags &= ~BDRV_O_INACTIVE;
2656    qemu_co_mutex_lock(&s->lock);
2657    ret = qcow2_do_open(bs, options, flags, &local_err);
2658    qemu_co_mutex_unlock(&s->lock);
2659    qobject_unref(options);
2660    if (local_err) {
2661        error_propagate_prepend(errp, local_err,
2662                                "Could not reopen qcow2 layer: ");
2663        bs->drv = NULL;
2664        return;
2665    } else if (ret < 0) {
2666        error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
2667        bs->drv = NULL;
2668        return;
2669    }
2670
2671    s->crypto = crypto;
2672}
2673
2674static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
2675    size_t len, size_t buflen)
2676{
2677    QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
2678    size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
2679
2680    if (buflen < ext_len) {
2681        return -ENOSPC;
2682    }
2683
2684    *ext_backing_fmt = (QCowExtension) {
2685        .magic  = cpu_to_be32(magic),
2686        .len    = cpu_to_be32(len),
2687    };
2688
2689    if (len) {
2690        memcpy(buf + sizeof(QCowExtension), s, len);
2691    }
2692
2693    return ext_len;
2694}
2695
2696/*
2697 * Updates the qcow2 header, including the variable length parts of it, i.e.
2698 * the backing file name and all extensions. qcow2 was not designed to allow
2699 * such changes, so if we run out of space (we can only use the first cluster)
2700 * this function may fail.
2701 *
2702 * Returns 0 on success, -errno in error cases.
2703 */
2704int qcow2_update_header(BlockDriverState *bs)
2705{
2706    BDRVQcow2State *s = bs->opaque;
2707    QCowHeader *header;
2708    char *buf;
2709    size_t buflen = s->cluster_size;
2710    int ret;
2711    uint64_t total_size;
2712    uint32_t refcount_table_clusters;
2713    size_t header_length;
2714    Qcow2UnknownHeaderExtension *uext;
2715
2716    buf = qemu_blockalign(bs, buflen);
2717
2718    /* Header structure */
2719    header = (QCowHeader*) buf;
2720
2721    if (buflen < sizeof(*header)) {
2722        ret = -ENOSPC;
2723        goto fail;
2724    }
2725
2726    header_length = sizeof(*header) + s->unknown_header_fields_size;
2727    total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
2728    refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
2729
2730    *header = (QCowHeader) {
2731        /* Version 2 fields */
2732        .magic                  = cpu_to_be32(QCOW_MAGIC),
2733        .version                = cpu_to_be32(s->qcow_version),
2734        .backing_file_offset    = 0,
2735        .backing_file_size      = 0,
2736        .cluster_bits           = cpu_to_be32(s->cluster_bits),
2737        .size                   = cpu_to_be64(total_size),
2738        .crypt_method           = cpu_to_be32(s->crypt_method_header),
2739        .l1_size                = cpu_to_be32(s->l1_size),
2740        .l1_table_offset        = cpu_to_be64(s->l1_table_offset),
2741        .refcount_table_offset  = cpu_to_be64(s->refcount_table_offset),
2742        .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
2743        .nb_snapshots           = cpu_to_be32(s->nb_snapshots),
2744        .snapshots_offset       = cpu_to_be64(s->snapshots_offset),
2745
2746        /* Version 3 fields */
2747        .incompatible_features  = cpu_to_be64(s->incompatible_features),
2748        .compatible_features    = cpu_to_be64(s->compatible_features),
2749        .autoclear_features     = cpu_to_be64(s->autoclear_features),
2750        .refcount_order         = cpu_to_be32(s->refcount_order),
2751        .header_length          = cpu_to_be32(header_length),
2752    };
2753
2754    /* For older versions, write a shorter header */
2755    switch (s->qcow_version) {
2756    case 2:
2757        ret = offsetof(QCowHeader, incompatible_features);
2758        break;
2759    case 3:
2760        ret = sizeof(*header);
2761        break;
2762    default:
2763        ret = -EINVAL;
2764        goto fail;
2765    }
2766
2767    buf += ret;
2768    buflen -= ret;
2769    memset(buf, 0, buflen);
2770
2771    /* Preserve any unknown field in the header */
2772    if (s->unknown_header_fields_size) {
2773        if (buflen < s->unknown_header_fields_size) {
2774            ret = -ENOSPC;
2775            goto fail;
2776        }
2777
2778        memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
2779        buf += s->unknown_header_fields_size;
2780        buflen -= s->unknown_header_fields_size;
2781    }
2782
2783    /* Backing file format header extension */
2784    if (s->image_backing_format) {
2785        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
2786                             s->image_backing_format,
2787                             strlen(s->image_backing_format),
2788                             buflen);
2789        if (ret < 0) {
2790            goto fail;
2791        }
2792
2793        buf += ret;
2794        buflen -= ret;
2795    }
2796
2797    /* External data file header extension */
2798    if (has_data_file(bs) && s->image_data_file) {
2799        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE,
2800                             s->image_data_file, strlen(s->image_data_file),
2801                             buflen);
2802        if (ret < 0) {
2803            goto fail;
2804        }
2805
2806        buf += ret;
2807        buflen -= ret;
2808    }
2809
2810    /* Full disk encryption header pointer extension */
2811    if (s->crypto_header.offset != 0) {
2812        s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset);
2813        s->crypto_header.length = cpu_to_be64(s->crypto_header.length);
2814        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
2815                             &s->crypto_header, sizeof(s->crypto_header),
2816                             buflen);
2817        s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
2818        s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
2819        if (ret < 0) {
2820            goto fail;
2821        }
2822        buf += ret;
2823        buflen -= ret;
2824    }
2825
2826    /*
2827     * Feature table.  A mere 8 feature names occupies 392 bytes, and
2828     * when coupled with the v3 minimum header of 104 bytes plus the
2829     * 8-byte end-of-extension marker, that would leave only 8 bytes
2830     * for a backing file name in an image with 512-byte clusters.
2831     * Thus, we choose to omit this header for cluster sizes 4k and
2832     * smaller.
2833     */
2834    if (s->qcow_version >= 3 && s->cluster_size > 4096) {
2835        static const Qcow2Feature features[] = {
2836            {
2837                .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2838                .bit  = QCOW2_INCOMPAT_DIRTY_BITNR,
2839                .name = "dirty bit",
2840            },
2841            {
2842                .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2843                .bit  = QCOW2_INCOMPAT_CORRUPT_BITNR,
2844                .name = "corrupt bit",
2845            },
2846            {
2847                .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2848                .bit  = QCOW2_INCOMPAT_DATA_FILE_BITNR,
2849                .name = "external data file",
2850            },
2851            {
2852                .type = QCOW2_FEAT_TYPE_COMPATIBLE,
2853                .bit  = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
2854                .name = "lazy refcounts",
2855            },
2856            {
2857                .type = QCOW2_FEAT_TYPE_AUTOCLEAR,
2858                .bit  = QCOW2_AUTOCLEAR_BITMAPS_BITNR,
2859                .name = "bitmaps",
2860            },
2861            {
2862                .type = QCOW2_FEAT_TYPE_AUTOCLEAR,
2863                .bit  = QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR,
2864                .name = "raw external data",
2865            },
2866        };
2867
2868        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
2869                             features, sizeof(features), buflen);
2870        if (ret < 0) {
2871            goto fail;
2872        }
2873        buf += ret;
2874        buflen -= ret;
2875    }
2876
2877    /* Bitmap extension */
2878    if (s->nb_bitmaps > 0) {
2879        Qcow2BitmapHeaderExt bitmaps_header = {
2880            .nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
2881            .bitmap_directory_size =
2882                    cpu_to_be64(s->bitmap_directory_size),
2883            .bitmap_directory_offset =
2884                    cpu_to_be64(s->bitmap_directory_offset)
2885        };
2886        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
2887                             &bitmaps_header, sizeof(bitmaps_header),
2888                             buflen);
2889        if (ret < 0) {
2890            goto fail;
2891        }
2892        buf += ret;
2893        buflen -= ret;
2894    }
2895
2896    /* Keep unknown header extensions */
2897    QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
2898        ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
2899        if (ret < 0) {
2900            goto fail;
2901        }
2902
2903        buf += ret;
2904        buflen -= ret;
2905    }
2906
2907    /* End of header extensions */
2908    ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
2909    if (ret < 0) {
2910        goto fail;
2911    }
2912
2913    buf += ret;
2914    buflen -= ret;
2915
2916    /* Backing file name */
2917    if (s->image_backing_file) {
2918        size_t backing_file_len = strlen(s->image_backing_file);
2919
2920        if (buflen < backing_file_len) {
2921            ret = -ENOSPC;
2922            goto fail;
2923        }
2924
2925        /* Using strncpy is ok here, since buf is not NUL-terminated. */
2926        strncpy(buf, s->image_backing_file, buflen);
2927
2928        header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
2929        header->backing_file_size   = cpu_to_be32(backing_file_len);
2930    }
2931
2932    /* Write the new header */
2933    ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
2934    if (ret < 0) {
2935        goto fail;
2936    }
2937
2938    ret = 0;
2939fail:
2940    qemu_vfree(header);
2941    return ret;
2942}
2943
2944static int qcow2_change_backing_file(BlockDriverState *bs,
2945    const char *backing_file, const char *backing_fmt)
2946{
2947    BDRVQcow2State *s = bs->opaque;
2948
2949    /* Adding a backing file means that the external data file alone won't be
2950     * enough to make sense of the content */
2951    if (backing_file && data_file_is_raw(bs)) {
2952        return -EINVAL;
2953    }
2954
2955    if (backing_file && strlen(backing_file) > 1023) {
2956        return -EINVAL;
2957    }
2958
2959    pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
2960            backing_file ?: "");
2961    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2962    pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2963
2964    g_free(s->image_backing_file);
2965    g_free(s->image_backing_format);
2966
2967    s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
2968    s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
2969
2970    return qcow2_update_header(bs);
2971}
2972
2973static int qcow2_crypt_method_from_format(const char *encryptfmt)
2974{
2975    if (g_str_equal(encryptfmt, "luks")) {
2976        return QCOW_CRYPT_LUKS;
2977    } else if (g_str_equal(encryptfmt, "aes")) {
2978        return QCOW_CRYPT_AES;
2979    } else {
2980        return -EINVAL;
2981    }
2982}
2983
2984static int qcow2_set_up_encryption(BlockDriverState *bs,
2985                                   QCryptoBlockCreateOptions *cryptoopts,
2986                                   Error **errp)
2987{
2988    BDRVQcow2State *s = bs->opaque;
2989    QCryptoBlock *crypto = NULL;
2990    int fmt, ret;
2991
2992    switch (cryptoopts->format) {
2993    case Q_CRYPTO_BLOCK_FORMAT_LUKS:
2994        fmt = QCOW_CRYPT_LUKS;
2995        break;
2996    case Q_CRYPTO_BLOCK_FORMAT_QCOW:
2997        fmt = QCOW_CRYPT_AES;
2998        break;
2999    default:
3000        error_setg(errp, "Crypto format not supported in qcow2");
3001        return -EINVAL;
3002    }
3003
3004    s->crypt_method_header = fmt;
3005
3006    crypto = qcrypto_block_create(cryptoopts, "encrypt.",
3007                                  qcow2_crypto_hdr_init_func,
3008                                  qcow2_crypto_hdr_write_func,
3009                                  bs, errp);
3010    if (!crypto) {
3011        return -EINVAL;
3012    }
3013
3014    ret = qcow2_update_header(bs);
3015    if (ret < 0) {
3016        error_setg_errno(errp, -ret, "Could not write encryption header");
3017        goto out;
3018    }
3019
3020    ret = 0;
3021 out:
3022    qcrypto_block_free(crypto);
3023    return ret;
3024}
3025
3026/**
3027 * Preallocates metadata structures for data clusters between @offset (in the
3028 * guest disk) and @new_length (which is thus generally the new guest disk
3029 * size).
3030 *
3031 * Returns: 0 on success, -errno on failure.
3032 */
3033static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
3034                                       uint64_t new_length, PreallocMode mode,
3035                                       Error **errp)
3036{
3037    BDRVQcow2State *s = bs->opaque;
3038    uint64_t bytes;
3039    uint64_t host_offset = 0;
3040    int64_t file_length;
3041    unsigned int cur_bytes;
3042    int ret;
3043    QCowL2Meta *meta;
3044
3045    assert(offset <= new_length);
3046    bytes = new_length - offset;
3047
3048    while (bytes) {
3049        cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size));
3050        ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
3051                                         &host_offset, &meta);
3052        if (ret < 0) {
3053            error_setg_errno(errp, -ret, "Allocating clusters failed");
3054            return ret;
3055        }
3056
3057        while (meta) {
3058            QCowL2Meta *next = meta->next;
3059
3060            ret = qcow2_alloc_cluster_link_l2(bs, meta);
3061            if (ret < 0) {
3062                error_setg_errno(errp, -ret, "Mapping clusters failed");
3063                qcow2_free_any_clusters(bs, meta->alloc_offset,
3064                                        meta->nb_clusters, QCOW2_DISCARD_NEVER);
3065                return ret;
3066            }
3067
3068            /* There are no dependent requests, but we need to remove our
3069             * request from the list of in-flight requests */
3070            QLIST_REMOVE(meta, next_in_flight);
3071
3072            g_free(meta);
3073            meta = next;
3074        }
3075
3076        /* TODO Preallocate data if requested */
3077
3078        bytes -= cur_bytes;
3079        offset += cur_bytes;
3080    }
3081
3082    /*
3083     * It is expected that the image file is large enough to actually contain
3084     * all of the allocated clusters (otherwise we get failing reads after
3085     * EOF). Extend the image to the last allocated sector.
3086     */
3087    file_length = bdrv_getlength(s->data_file->bs);
3088    if (file_length < 0) {
3089        error_setg_errno(errp, -file_length, "Could not get file size");
3090        return file_length;
3091    }
3092
3093    if (host_offset + cur_bytes > file_length) {
3094        if (mode == PREALLOC_MODE_METADATA) {
3095            mode = PREALLOC_MODE_OFF;
3096        }
3097        ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
3098                               mode, errp);
3099        if (ret < 0) {
3100            return ret;
3101        }
3102    }
3103
3104    return 0;
3105}
3106
3107/* qcow2_refcount_metadata_size:
3108 * @clusters: number of clusters to refcount (including data and L1/L2 tables)
3109 * @cluster_size: size of a cluster, in bytes
3110 * @refcount_order: refcount bits power-of-2 exponent
3111 * @generous_increase: allow for the refcount table to be 1.5x as large as it
3112 *                     needs to be
3113 *
3114 * Returns: Number of bytes required for refcount blocks and table metadata.
3115 */
3116int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
3117                                     int refcount_order, bool generous_increase,
3118                                     uint64_t *refblock_count)
3119{
3120    /*
3121     * Every host cluster is reference-counted, including metadata (even
3122     * refcount metadata is recursively included).
3123     *
3124     * An accurate formula for the size of refcount metadata size is difficult
3125     * to derive.  An easier method of calculation is finding the fixed point
3126     * where no further refcount blocks or table clusters are required to
3127     * reference count every cluster.
3128     */
3129    int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t);
3130    int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
3131    int64_t table = 0;  /* number of refcount table clusters */
3132    int64_t blocks = 0; /* number of refcount block clusters */
3133    int64_t last;
3134    int64_t n = 0;
3135
3136    do {
3137        last = n;
3138        blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
3139        table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
3140        n = clusters + blocks + table;
3141
3142        if (n == last && generous_increase) {
3143            clusters += DIV_ROUND_UP(table, 2);
3144            n = 0; /* force another loop */
3145            generous_increase = false;
3146        }
3147    } while (n != last);
3148
3149    if (refblock_count) {
3150        *refblock_count = blocks;
3151    }
3152
3153    return (blocks + table) * cluster_size;
3154}
3155
3156/**
3157 * qcow2_calc_prealloc_size:
3158 * @total_size: virtual disk size in bytes
3159 * @cluster_size: cluster size in bytes
3160 * @refcount_order: refcount bits power-of-2 exponent
3161 *
3162 * Returns: Total number of bytes required for the fully allocated image
3163 * (including metadata).
3164 */
3165static int64_t qcow2_calc_prealloc_size(int64_t total_size,
3166                                        size_t cluster_size,
3167                                        int refcount_order)
3168{
3169    int64_t meta_size = 0;
3170    uint64_t nl1e, nl2e;
3171    int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
3172
3173    /* header: 1 cluster */
3174    meta_size += cluster_size;
3175
3176    /* total size of L2 tables */
3177    nl2e = aligned_total_size / cluster_size;
3178    nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t));
3179    meta_size += nl2e * sizeof(uint64_t);
3180
3181    /* total size of L1 tables */
3182    nl1e = nl2e * sizeof(uint64_t) / cluster_size;
3183    nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t));
3184    meta_size += nl1e * sizeof(uint64_t);
3185
3186    /* total size of refcount table and blocks */
3187    meta_size += qcow2_refcount_metadata_size(
3188            (meta_size + aligned_total_size) / cluster_size,
3189            cluster_size, refcount_order, false, NULL);
3190
3191    return meta_size + aligned_total_size;
3192}
3193
3194static bool validate_cluster_size(size_t cluster_size, Error **errp)
3195{
3196    int cluster_bits = ctz32(cluster_size);
3197    if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
3198        (1 << cluster_bits) != cluster_size)
3199    {
3200        error_setg(errp, "Cluster size must be a power of two between %d and "
3201                   "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
3202        return false;
3203    }
3204    return true;
3205}
3206
3207static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp)
3208{
3209    size_t cluster_size;
3210
3211    cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
3212                                         DEFAULT_CLUSTER_SIZE);
3213    if (!validate_cluster_size(cluster_size, errp)) {
3214        return 0;
3215    }
3216    return cluster_size;
3217}
3218
3219static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
3220{
3221    char *buf;
3222    int ret;
3223
3224    buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
3225    if (!buf) {
3226        ret = 3; /* default */
3227    } else if (!strcmp(buf, "0.10")) {
3228        ret = 2;
3229    } else if (!strcmp(buf, "1.1")) {
3230        ret = 3;
3231    } else {
3232        error_setg(errp, "Invalid compatibility level: '%s'", buf);
3233        ret = -EINVAL;
3234    }
3235    g_free(buf);
3236    return ret;
3237}
3238
3239static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
3240                                                Error **errp)
3241{
3242    uint64_t refcount_bits;
3243
3244    refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
3245    if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
3246        error_setg(errp, "Refcount width must be a power of two and may not "
3247                   "exceed 64 bits");
3248        return 0;
3249    }
3250
3251    if (version < 3 && refcount_bits != 16) {
3252        error_setg(errp, "Different refcount widths than 16 bits require "
3253                   "compatibility level 1.1 or above (use compat=1.1 or "
3254                   "greater)");
3255        return 0;
3256    }
3257
3258    return refcount_bits;
3259}
3260
3261static int coroutine_fn
3262qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
3263{
3264    BlockdevCreateOptionsQcow2 *qcow2_opts;
3265    QDict *options;
3266
3267    /*
3268     * Open the image file and write a minimal qcow2 header.
3269     *
3270     * We keep things simple and start with a zero-sized image. We also
3271     * do without refcount blocks or a L1 table for now. We'll fix the
3272     * inconsistency later.
3273     *
3274     * We do need a refcount table because growing the refcount table means
3275     * allocating two new refcount blocks - the second of which would be at
3276     * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
3277     * size for any qcow2 image.
3278     */
3279    BlockBackend *blk = NULL;
3280    BlockDriverState *bs = NULL;
3281    BlockDriverState *data_bs = NULL;
3282    QCowHeader *header;
3283    size_t cluster_size;
3284    int version;
3285    int refcount_order;
3286    uint64_t* refcount_table;
3287    Error *local_err = NULL;
3288    int ret;
3289
3290    assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
3291    qcow2_opts = &create_options->u.qcow2;
3292
3293    bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp);
3294    if (bs == NULL) {
3295        return -EIO;
3296    }
3297
3298    /* Validate options and set default values */
3299    if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) {
3300        error_setg(errp, "Image size must be a multiple of %u bytes",
3301                   (unsigned) BDRV_SECTOR_SIZE);
3302        ret = -EINVAL;
3303        goto out;
3304    }
3305
3306    if (qcow2_opts->has_version) {
3307        switch (qcow2_opts->version) {
3308        case BLOCKDEV_QCOW2_VERSION_V2:
3309            version = 2;
3310            break;
3311        case BLOCKDEV_QCOW2_VERSION_V3:
3312            version = 3;
3313            break;
3314        default:
3315            g_assert_not_reached();
3316        }
3317    } else {
3318        version = 3;
3319    }
3320
3321    if (qcow2_opts->has_cluster_size) {
3322        cluster_size = qcow2_opts->cluster_size;
3323    } else {
3324        cluster_size = DEFAULT_CLUSTER_SIZE;
3325    }
3326
3327    if (!validate_cluster_size(cluster_size, errp)) {
3328        ret = -EINVAL;
3329        goto out;
3330    }
3331
3332    if (!qcow2_opts->has_preallocation) {
3333        qcow2_opts->preallocation = PREALLOC_MODE_OFF;
3334    }
3335    if (qcow2_opts->has_backing_file &&
3336        qcow2_opts->preallocation != PREALLOC_MODE_OFF)
3337    {
3338        error_setg(errp, "Backing file and preallocation cannot be used at "
3339                   "the same time");
3340        ret = -EINVAL;
3341        goto out;
3342    }
3343    if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) {
3344        error_setg(errp, "Backing format cannot be used without backing file");
3345        ret = -EINVAL;
3346        goto out;
3347    }
3348
3349    if (!qcow2_opts->has_lazy_refcounts) {
3350        qcow2_opts->lazy_refcounts = false;
3351    }
3352    if (version < 3 && qcow2_opts->lazy_refcounts) {
3353        error_setg(errp, "Lazy refcounts only supported with compatibility "
3354                   "level 1.1 and above (use version=v3 or greater)");
3355        ret = -EINVAL;
3356        goto out;
3357    }
3358
3359    if (!qcow2_opts->has_refcount_bits) {
3360        qcow2_opts->refcount_bits = 16;
3361    }
3362    if (qcow2_opts->refcount_bits > 64 ||
3363        !is_power_of_2(qcow2_opts->refcount_bits))
3364    {
3365        error_setg(errp, "Refcount width must be a power of two and may not "
3366                   "exceed 64 bits");
3367        ret = -EINVAL;
3368        goto out;
3369    }
3370    if (version < 3 && qcow2_opts->refcount_bits != 16) {
3371        error_setg(errp, "Different refcount widths than 16 bits require "
3372                   "compatibility level 1.1 or above (use version=v3 or "
3373                   "greater)");
3374        ret = -EINVAL;
3375        goto out;
3376    }
3377    refcount_order = ctz32(qcow2_opts->refcount_bits);
3378
3379    if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) {
3380        error_setg(errp, "data-file-raw requires data-file");
3381        ret = -EINVAL;
3382        goto out;
3383    }
3384    if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) {
3385        error_setg(errp, "Backing file and data-file-raw cannot be used at "
3386                   "the same time");
3387        ret = -EINVAL;
3388        goto out;
3389    }
3390
3391    if (qcow2_opts->data_file) {
3392        if (version < 3) {
3393            error_setg(errp, "External data files are only supported with "
3394                       "compatibility level 1.1 and above (use version=v3 or "
3395                       "greater)");
3396            ret = -EINVAL;
3397            goto out;
3398        }
3399        data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp);
3400        if (data_bs == NULL) {
3401            ret = -EIO;
3402            goto out;
3403        }
3404    }
3405
3406    /* Create BlockBackend to write to the image */
3407    blk = blk_new(bdrv_get_aio_context(bs),
3408                  BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
3409    ret = blk_insert_bs(blk, bs, errp);
3410    if (ret < 0) {
3411        goto out;
3412    }
3413    blk_set_allow_write_beyond_eof(blk, true);
3414
3415    /* Write the header */
3416    QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
3417    header = g_malloc0(cluster_size);
3418    *header = (QCowHeader) {
3419        .magic                      = cpu_to_be32(QCOW_MAGIC),
3420        .version                    = cpu_to_be32(version),
3421        .cluster_bits               = cpu_to_be32(ctz32(cluster_size)),
3422        .size                       = cpu_to_be64(0),
3423        .l1_table_offset            = cpu_to_be64(0),
3424        .l1_size                    = cpu_to_be32(0),
3425        .refcount_table_offset      = cpu_to_be64(cluster_size),
3426        .refcount_table_clusters    = cpu_to_be32(1),
3427        .refcount_order             = cpu_to_be32(refcount_order),
3428        .header_length              = cpu_to_be32(sizeof(*header)),
3429    };
3430
3431    /* We'll update this to correct value later */
3432    header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
3433
3434    if (qcow2_opts->lazy_refcounts) {
3435        header->compatible_features |=
3436            cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
3437    }
3438    if (data_bs) {
3439        header->incompatible_features |=
3440            cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE);
3441    }
3442    if (qcow2_opts->data_file_raw) {
3443        header->autoclear_features |=
3444            cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW);
3445    }
3446
3447    ret = blk_pwrite(blk, 0, header, cluster_size, 0);
3448    g_free(header);
3449    if (ret < 0) {
3450        error_setg_errno(errp, -ret, "Could not write qcow2 header");
3451        goto out;
3452    }
3453
3454    /* Write a refcount table with one refcount block */
3455    refcount_table = g_malloc0(2 * cluster_size);
3456    refcount_table[0] = cpu_to_be64(2 * cluster_size);
3457    ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
3458    g_free(refcount_table);
3459
3460    if (ret < 0) {
3461        error_setg_errno(errp, -ret, "Could not write refcount table");
3462        goto out;
3463    }
3464
3465    blk_unref(blk);
3466    blk = NULL;
3467
3468    /*
3469     * And now open the image and make it consistent first (i.e. increase the
3470     * refcount of the cluster that is occupied by the header and the refcount
3471     * table)
3472     */
3473    options = qdict_new();
3474    qdict_put_str(options, "driver", "qcow2");
3475    qdict_put_str(options, "file", bs->node_name);
3476    if (data_bs) {
3477        qdict_put_str(options, "data-file", data_bs->node_name);
3478    }
3479    blk = blk_new_open(NULL, NULL, options,
3480                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
3481                       &local_err);
3482    if (blk == NULL) {
3483        error_propagate(errp, local_err);
3484        ret = -EIO;
3485        goto out;
3486    }
3487
3488    ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
3489    if (ret < 0) {
3490        error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
3491                         "header and refcount table");
3492        goto out;
3493
3494    } else if (ret != 0) {
3495        error_report("Huh, first cluster in empty image is already in use?");
3496        abort();
3497    }
3498
3499    /* Set the external data file if necessary */
3500    if (data_bs) {
3501        BDRVQcow2State *s = blk_bs(blk)->opaque;
3502        s->image_data_file = g_strdup(data_bs->filename);
3503    }
3504
3505    /* Create a full header (including things like feature table) */
3506    ret = qcow2_update_header(blk_bs(blk));
3507    if (ret < 0) {
3508        error_setg_errno(errp, -ret, "Could not update qcow2 header");
3509        goto out;
3510    }
3511
3512    /* Okay, now that we have a valid image, let's give it the right size */
3513    ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
3514                       errp);
3515    if (ret < 0) {
3516        error_prepend(errp, "Could not resize image: ");
3517        goto out;
3518    }
3519
3520    /* Want a backing file? There you go. */
3521    if (qcow2_opts->has_backing_file) {
3522        const char *backing_format = NULL;
3523
3524        if (qcow2_opts->has_backing_fmt) {
3525            backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt);
3526        }
3527
3528        ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
3529                                       backing_format);
3530        if (ret < 0) {
3531            error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
3532                             "with format '%s'", qcow2_opts->backing_file,
3533                             backing_format);
3534            goto out;
3535        }
3536    }
3537
3538    /* Want encryption? There you go. */
3539    if (qcow2_opts->has_encrypt) {
3540        ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
3541        if (ret < 0) {
3542            goto out;
3543        }
3544    }
3545
3546    blk_unref(blk);
3547    blk = NULL;
3548
3549    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning.
3550     * Using BDRV_O_NO_IO, since encryption is now setup we don't want to
3551     * have to setup decryption context. We're not doing any I/O on the top
3552     * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does
3553     * not have effect.
3554     */
3555    options = qdict_new();
3556    qdict_put_str(options, "driver", "qcow2");
3557    qdict_put_str(options, "file", bs->node_name);
3558    if (data_bs) {
3559        qdict_put_str(options, "data-file", data_bs->node_name);
3560    }
3561    blk = blk_new_open(NULL, NULL, options,
3562                       BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
3563                       &local_err);
3564    if (blk == NULL) {
3565        error_propagate(errp, local_err);
3566        ret = -EIO;
3567        goto out;
3568    }
3569
3570    ret = 0;
3571out:
3572    blk_unref(blk);
3573    bdrv_unref(bs);
3574    bdrv_unref(data_bs);
3575    return ret;
3576}
3577
3578static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv,
3579                                             const char *filename,
3580                                             QemuOpts *opts,
3581                                             Error **errp)
3582{
3583    BlockdevCreateOptions *create_options = NULL;
3584    QDict *qdict;
3585    Visitor *v;
3586    BlockDriverState *bs = NULL;
3587    BlockDriverState *data_bs = NULL;
3588    Error *local_err = NULL;
3589    const char *val;
3590    int ret;
3591
3592    /* Only the keyval visitor supports the dotted syntax needed for
3593     * encryption, so go through a QDict before getting a QAPI type. Ignore
3594     * options meant for the protocol layer so that the visitor doesn't
3595     * complain. */
3596    qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts,
3597                                        true);
3598
3599    /* Handle encryption options */
3600    val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT);
3601    if (val && !strcmp(val, "on")) {
3602        qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow");
3603    } else if (val && !strcmp(val, "off")) {
3604        qdict_del(qdict, BLOCK_OPT_ENCRYPT);
3605    }
3606
3607    val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT);
3608    if (val && !strcmp(val, "aes")) {
3609        qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow");
3610    }
3611
3612    /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into
3613     * version=v2/v3 below. */
3614    val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL);
3615    if (val && !strcmp(val, "0.10")) {
3616        qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2");
3617    } else if (val && !strcmp(val, "1.1")) {
3618        qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
3619    }
3620
3621    /* Change legacy command line options into QMP ones */
3622    static const QDictRenames opt_renames[] = {
3623        { BLOCK_OPT_BACKING_FILE,       "backing-file" },
3624        { BLOCK_OPT_BACKING_FMT,        "backing-fmt" },
3625        { BLOCK_OPT_CLUSTER_SIZE,       "cluster-size" },
3626        { BLOCK_OPT_LAZY_REFCOUNTS,     "lazy-refcounts" },
3627        { BLOCK_OPT_REFCOUNT_BITS,      "refcount-bits" },
3628        { BLOCK_OPT_ENCRYPT,            BLOCK_OPT_ENCRYPT_FORMAT },
3629        { BLOCK_OPT_COMPAT_LEVEL,       "version" },
3630        { BLOCK_OPT_DATA_FILE_RAW,      "data-file-raw" },
3631        { NULL, NULL },
3632    };
3633
3634    if (!qdict_rename_keys(qdict, opt_renames, errp)) {
3635        ret = -EINVAL;
3636        goto finish;
3637    }
3638
3639    /* Create and open the file (protocol layer) */
3640    ret = bdrv_create_file(filename, opts, errp);
3641    if (ret < 0) {
3642        goto finish;
3643    }
3644
3645    bs = bdrv_open(filename, NULL, NULL,
3646                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
3647    if (bs == NULL) {
3648        ret = -EIO;
3649        goto finish;
3650    }
3651
3652    /* Create and open an external data file (protocol layer) */
3653    val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
3654    if (val) {
3655        ret = bdrv_create_file(val, opts, errp);
3656        if (ret < 0) {
3657            goto finish;
3658        }
3659
3660        data_bs = bdrv_open(val, NULL, NULL,
3661                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
3662                            errp);
3663        if (data_bs == NULL) {
3664            ret = -EIO;
3665            goto finish;
3666        }
3667
3668        qdict_del(qdict, BLOCK_OPT_DATA_FILE);
3669        qdict_put_str(qdict, "data-file", data_bs->node_name);
3670    }
3671
3672    /* Set 'driver' and 'node' options */
3673    qdict_put_str(qdict, "driver", "qcow2");
3674    qdict_put_str(qdict, "file", bs->node_name);
3675
3676    /* Now get the QAPI type BlockdevCreateOptions */
3677    v = qobject_input_visitor_new_flat_confused(qdict, errp);
3678    if (!v) {
3679        ret = -EINVAL;
3680        goto finish;
3681    }
3682
3683    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
3684    visit_free(v);
3685
3686    if (local_err) {
3687        error_propagate(errp, local_err);
3688        ret = -EINVAL;
3689        goto finish;
3690    }
3691
3692    /* Silently round up size */
3693    create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size,
3694                                            BDRV_SECTOR_SIZE);
3695
3696    /* Create the qcow2 image (format layer) */
3697    ret = qcow2_co_create(create_options, errp);
3698    if (ret < 0) {
3699        goto finish;
3700    }
3701
3702    ret = 0;
3703finish:
3704    qobject_unref(qdict);
3705    bdrv_unref(bs);
3706    bdrv_unref(data_bs);
3707    qapi_free_BlockdevCreateOptions(create_options);
3708    return ret;
3709}
3710
3711
3712static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
3713{
3714    int64_t nr;
3715    int res;
3716
3717    /* Clamp to image length, before checking status of underlying sectors */
3718    if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
3719        bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
3720    }
3721
3722    if (!bytes) {
3723        return true;
3724    }
3725    res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
3726    return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
3727}
3728
3729static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
3730    int64_t offset, int bytes, BdrvRequestFlags flags)
3731{
3732    int ret;
3733    BDRVQcow2State *s = bs->opaque;
3734
3735    uint32_t head = offset % s->cluster_size;
3736    uint32_t tail = (offset + bytes) % s->cluster_size;
3737
3738    trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
3739    if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
3740        tail = 0;
3741    }
3742
3743    if (head || tail) {
3744        uint64_t off;
3745        unsigned int nr;
3746
3747        assert(head + bytes <= s->cluster_size);
3748
3749        /* check whether remainder of cluster already reads as zero */
3750        if (!(is_zero(bs, offset - head, head) &&
3751              is_zero(bs, offset + bytes,
3752                      tail ? s->cluster_size - tail : 0))) {
3753            return -ENOTSUP;
3754        }
3755
3756        qemu_co_mutex_lock(&s->lock);
3757        /* We can have new write after previous check */
3758        offset = QEMU_ALIGN_DOWN(offset, s->cluster_size);
3759        bytes = s->cluster_size;
3760        nr = s->cluster_size;
3761        ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
3762        if (ret != QCOW2_CLUSTER_UNALLOCATED &&
3763            ret != QCOW2_CLUSTER_ZERO_PLAIN &&
3764            ret != QCOW2_CLUSTER_ZERO_ALLOC) {
3765            qemu_co_mutex_unlock(&s->lock);
3766            return -ENOTSUP;
3767        }
3768    } else {
3769        qemu_co_mutex_lock(&s->lock);
3770    }
3771
3772    trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
3773
3774    /* Whatever is left can use real zero clusters */
3775    ret = qcow2_cluster_zeroize(bs, offset, bytes, flags);
3776    qemu_co_mutex_unlock(&s->lock);
3777
3778    return ret;
3779}
3780
3781static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
3782                                          int64_t offset, int bytes)
3783{
3784    int ret;
3785    BDRVQcow2State *s = bs->opaque;
3786
3787    /* If the image does not support QCOW_OFLAG_ZERO then discarding
3788     * clusters could expose stale data from the backing file. */
3789    if (s->qcow_version < 3 && bs->backing) {
3790        return -ENOTSUP;
3791    }
3792
3793    if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
3794        assert(bytes < s->cluster_size);
3795        /* Ignore partial clusters, except for the special case of the
3796         * complete partial cluster at the end of an unaligned file */
3797        if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
3798            offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
3799            return -ENOTSUP;
3800        }
3801    }
3802
3803    qemu_co_mutex_lock(&s->lock);
3804    ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
3805                                false);
3806    qemu_co_mutex_unlock(&s->lock);
3807    return ret;
3808}
3809
3810static int coroutine_fn
3811qcow2_co_copy_range_from(BlockDriverState *bs,
3812                         BdrvChild *src, uint64_t src_offset,
3813                         BdrvChild *dst, uint64_t dst_offset,
3814                         uint64_t bytes, BdrvRequestFlags read_flags,
3815                         BdrvRequestFlags write_flags)
3816{
3817    BDRVQcow2State *s = bs->opaque;
3818    int ret;
3819    unsigned int cur_bytes; /* number of bytes in current iteration */
3820    BdrvChild *child = NULL;
3821    BdrvRequestFlags cur_write_flags;
3822
3823    assert(!bs->encrypted);
3824    qemu_co_mutex_lock(&s->lock);
3825
3826    while (bytes != 0) {
3827        uint64_t copy_offset = 0;
3828        /* prepare next request */
3829        cur_bytes = MIN(bytes, INT_MAX);
3830        cur_write_flags = write_flags;
3831
3832        ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, &copy_offset);
3833        if (ret < 0) {
3834            goto out;
3835        }
3836
3837        switch (ret) {
3838        case QCOW2_CLUSTER_UNALLOCATED:
3839            if (bs->backing && bs->backing->bs) {
3840                int64_t backing_length = bdrv_getlength(bs->backing->bs);
3841                if (src_offset >= backing_length) {
3842                    cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3843                } else {
3844                    child = bs->backing;
3845                    cur_bytes = MIN(cur_bytes, backing_length - src_offset);
3846                    copy_offset = src_offset;
3847                }
3848            } else {
3849                cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3850            }
3851            break;
3852
3853        case QCOW2_CLUSTER_ZERO_PLAIN:
3854        case QCOW2_CLUSTER_ZERO_ALLOC:
3855            cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3856            break;
3857
3858        case QCOW2_CLUSTER_COMPRESSED:
3859            ret = -ENOTSUP;
3860            goto out;
3861
3862        case QCOW2_CLUSTER_NORMAL:
3863            child = s->data_file;
3864            copy_offset += offset_into_cluster(s, src_offset);
3865            break;
3866
3867        default:
3868            abort();
3869        }
3870        qemu_co_mutex_unlock(&s->lock);
3871        ret = bdrv_co_copy_range_from(child,
3872                                      copy_offset,
3873                                      dst, dst_offset,
3874                                      cur_bytes, read_flags, cur_write_flags);
3875        qemu_co_mutex_lock(&s->lock);
3876        if (ret < 0) {
3877            goto out;
3878        }
3879
3880        bytes -= cur_bytes;
3881        src_offset += cur_bytes;
3882        dst_offset += cur_bytes;
3883    }
3884    ret = 0;
3885
3886out:
3887    qemu_co_mutex_unlock(&s->lock);
3888    return ret;
3889}
3890
3891static int coroutine_fn
3892qcow2_co_copy_range_to(BlockDriverState *bs,
3893                       BdrvChild *src, uint64_t src_offset,
3894                       BdrvChild *dst, uint64_t dst_offset,
3895                       uint64_t bytes, BdrvRequestFlags read_flags,
3896                       BdrvRequestFlags write_flags)
3897{
3898    BDRVQcow2State *s = bs->opaque;
3899    int offset_in_cluster;
3900    int ret;
3901    unsigned int cur_bytes; /* number of sectors in current iteration */
3902    uint64_t cluster_offset;
3903    QCowL2Meta *l2meta = NULL;
3904
3905    assert(!bs->encrypted);
3906
3907    qemu_co_mutex_lock(&s->lock);
3908
3909    while (bytes != 0) {
3910
3911        l2meta = NULL;
3912
3913        offset_in_cluster = offset_into_cluster(s, dst_offset);
3914        cur_bytes = MIN(bytes, INT_MAX);
3915
3916        /* TODO:
3917         * If src->bs == dst->bs, we could simply copy by incrementing
3918         * the refcnt, without copying user data.
3919         * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */
3920        ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
3921                                         &cluster_offset, &l2meta);
3922        if (ret < 0) {
3923            goto fail;
3924        }
3925
3926        assert(offset_into_cluster(s, cluster_offset) == 0);
3927
3928        ret = qcow2_pre_write_overlap_check(bs, 0,
3929                cluster_offset + offset_in_cluster, cur_bytes, true);
3930        if (ret < 0) {
3931            goto fail;
3932        }
3933
3934        qemu_co_mutex_unlock(&s->lock);
3935        ret = bdrv_co_copy_range_to(src, src_offset,
3936                                    s->data_file,
3937                                    cluster_offset + offset_in_cluster,
3938                                    cur_bytes, read_flags, write_flags);
3939        qemu_co_mutex_lock(&s->lock);
3940        if (ret < 0) {
3941            goto fail;
3942        }
3943
3944        ret = qcow2_handle_l2meta(bs, &l2meta, true);
3945        if (ret) {
3946            goto fail;
3947        }
3948
3949        bytes -= cur_bytes;
3950        src_offset += cur_bytes;
3951        dst_offset += cur_bytes;
3952    }
3953    ret = 0;
3954
3955fail:
3956    qcow2_handle_l2meta(bs, &l2meta, false);
3957
3958    qemu_co_mutex_unlock(&s->lock);
3959
3960    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
3961
3962    return ret;
3963}
3964
3965static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
3966                                          bool exact, PreallocMode prealloc,
3967                                          Error **errp)
3968{
3969    BDRVQcow2State *s = bs->opaque;
3970    uint64_t old_length;
3971    int64_t new_l1_size;
3972    int ret;
3973    QDict *options;
3974
3975    if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
3976        prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
3977    {
3978        error_setg(errp, "Unsupported preallocation mode '%s'",
3979                   PreallocMode_str(prealloc));
3980        return -ENOTSUP;
3981    }
3982
3983    if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) {
3984        error_setg(errp, "The new size must be a multiple of %u",
3985                   (unsigned) BDRV_SECTOR_SIZE);
3986        return -EINVAL;
3987    }
3988
3989    qemu_co_mutex_lock(&s->lock);
3990
3991    /* cannot proceed if image has snapshots */
3992    if (s->nb_snapshots) {
3993        error_setg(errp, "Can't resize an image which has snapshots");
3994        ret = -ENOTSUP;
3995        goto fail;
3996    }
3997
3998    /* cannot proceed if image has bitmaps */
3999    if (qcow2_truncate_bitmaps_check(bs, errp)) {
4000        ret = -ENOTSUP;
4001        goto fail;
4002    }
4003
4004    old_length = bs->total_sectors * BDRV_SECTOR_SIZE;
4005    new_l1_size = size_to_l1(s, offset);
4006
4007    if (offset < old_length) {
4008        int64_t last_cluster, old_file_size;
4009        if (prealloc != PREALLOC_MODE_OFF) {
4010            error_setg(errp,
4011                       "Preallocation can't be used for shrinking an image");
4012            ret = -EINVAL;
4013            goto fail;
4014        }
4015
4016        ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
4017                                    old_length - ROUND_UP(offset,
4018                                                          s->cluster_size),
4019                                    QCOW2_DISCARD_ALWAYS, true);
4020        if (ret < 0) {
4021            error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
4022            goto fail;
4023        }
4024
4025        ret = qcow2_shrink_l1_table(bs, new_l1_size);
4026        if (ret < 0) {
4027            error_setg_errno(errp, -ret,
4028                             "Failed to reduce the number of L2 tables");
4029            goto fail;
4030        }
4031
4032        ret = qcow2_shrink_reftable(bs);
4033        if (ret < 0) {
4034            error_setg_errno(errp, -ret,
4035                             "Failed to discard unused refblocks");
4036            goto fail;
4037        }
4038
4039        old_file_size = bdrv_getlength(bs->file->bs);
4040        if (old_file_size < 0) {
4041            error_setg_errno(errp, -old_file_size,
4042                             "Failed to inquire current file length");
4043            ret = old_file_size;
4044            goto fail;
4045        }
4046        last_cluster = qcow2_get_last_cluster(bs, old_file_size);
4047        if (last_cluster < 0) {
4048            error_setg_errno(errp, -last_cluster,
4049                             "Failed to find the last cluster");
4050            ret = last_cluster;
4051            goto fail;
4052        }
4053        if ((last_cluster + 1) * s->cluster_size < old_file_size) {
4054            Error *local_err = NULL;
4055
4056            /*
4057             * Do not pass @exact here: It will not help the user if
4058             * we get an error here just because they wanted to shrink
4059             * their qcow2 image (on a block device) with qemu-img.
4060             * (And on the qcow2 layer, the @exact requirement is
4061             * always fulfilled, so there is no need to pass it on.)
4062             */
4063            bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
4064                             false, PREALLOC_MODE_OFF, &local_err);
4065            if (local_err) {
4066                warn_reportf_err(local_err,
4067                                 "Failed to truncate the tail of the image: ");
4068            }
4069        }
4070    } else {
4071        ret = qcow2_grow_l1_table(bs, new_l1_size, true);
4072        if (ret < 0) {
4073            error_setg_errno(errp, -ret, "Failed to grow the L1 table");
4074            goto fail;
4075        }
4076    }
4077
4078    switch (prealloc) {
4079    case PREALLOC_MODE_OFF:
4080        if (has_data_file(bs)) {
4081            /*
4082             * If the caller wants an exact resize, the external data
4083             * file should be resized to the exact target size, too,
4084             * so we pass @exact here.
4085             */
4086            ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp);
4087            if (ret < 0) {
4088                goto fail;
4089            }
4090        }
4091        break;
4092
4093    case PREALLOC_MODE_METADATA:
4094        ret = preallocate_co(bs, old_length, offset, prealloc, errp);
4095        if (ret < 0) {
4096            goto fail;
4097        }
4098        break;
4099
4100    case PREALLOC_MODE_FALLOC:
4101    case PREALLOC_MODE_FULL:
4102    {
4103        int64_t allocation_start, host_offset, guest_offset;
4104        int64_t clusters_allocated;
4105        int64_t old_file_size, new_file_size;
4106        uint64_t nb_new_data_clusters, nb_new_l2_tables;
4107
4108        /* With a data file, preallocation means just allocating the metadata
4109         * and forwarding the truncate request to the data file */
4110        if (has_data_file(bs)) {
4111            ret = preallocate_co(bs, old_length, offset, prealloc, errp);
4112            if (ret < 0) {
4113                goto fail;
4114            }
4115            break;
4116        }
4117
4118        old_file_size = bdrv_getlength(bs->file->bs);
4119        if (old_file_size < 0) {
4120            error_setg_errno(errp, -old_file_size,
4121                             "Failed to inquire current file length");
4122            ret = old_file_size;
4123            goto fail;
4124        }
4125        old_file_size = ROUND_UP(old_file_size, s->cluster_size);
4126
4127        nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
4128                                            s->cluster_size);
4129
4130        /* This is an overestimation; we will not actually allocate space for
4131         * these in the file but just make sure the new refcount structures are
4132         * able to cover them so we will not have to allocate new refblocks
4133         * while entering the data blocks in the potentially new L2 tables.
4134         * (We do not actually care where the L2 tables are placed. Maybe they
4135         *  are already allocated or they can be placed somewhere before
4136         *  @old_file_size. It does not matter because they will be fully
4137         *  allocated automatically, so they do not need to be covered by the
4138         *  preallocation. All that matters is that we will not have to allocate
4139         *  new refcount structures for them.) */
4140        nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
4141                                        s->cluster_size / sizeof(uint64_t));
4142        /* The cluster range may not be aligned to L2 boundaries, so add one L2
4143         * table for a potential head/tail */
4144        nb_new_l2_tables++;
4145
4146        allocation_start = qcow2_refcount_area(bs, old_file_size,
4147                                               nb_new_data_clusters +
4148                                               nb_new_l2_tables,
4149                                               true, 0, 0);
4150        if (allocation_start < 0) {
4151            error_setg_errno(errp, -allocation_start,
4152                             "Failed to resize refcount structures");
4153            ret = allocation_start;
4154            goto fail;
4155        }
4156
4157        clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
4158                                                     nb_new_data_clusters);
4159        if (clusters_allocated < 0) {
4160            error_setg_errno(errp, -clusters_allocated,
4161                             "Failed to allocate data clusters");
4162            ret = clusters_allocated;
4163            goto fail;
4164        }
4165
4166        assert(clusters_allocated == nb_new_data_clusters);
4167
4168        /* Allocate the data area */
4169        new_file_size = allocation_start +
4170                        nb_new_data_clusters * s->cluster_size;
4171        /* Image file grows, so @exact does not matter */
4172        ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp);
4173        if (ret < 0) {
4174            error_prepend(errp, "Failed to resize underlying file: ");
4175            qcow2_free_clusters(bs, allocation_start,
4176                                nb_new_data_clusters * s->cluster_size,
4177                                QCOW2_DISCARD_OTHER);
4178            goto fail;
4179        }
4180
4181        /* Create the necessary L2 entries */
4182        host_offset = allocation_start;
4183        guest_offset = old_length;
4184        while (nb_new_data_clusters) {
4185            int64_t nb_clusters = MIN(
4186                nb_new_data_clusters,
4187                s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
4188            QCowL2Meta allocation = {
4189                .offset       = guest_offset,
4190                .alloc_offset = host_offset,
4191                .nb_clusters  = nb_clusters,
4192            };
4193            qemu_co_queue_init(&allocation.dependent_requests);
4194
4195            ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
4196            if (ret < 0) {
4197                error_setg_errno(errp, -ret, "Failed to update L2 tables");
4198                qcow2_free_clusters(bs, host_offset,
4199                                    nb_new_data_clusters * s->cluster_size,
4200                                    QCOW2_DISCARD_OTHER);
4201                goto fail;
4202            }
4203
4204            guest_offset += nb_clusters * s->cluster_size;
4205            host_offset += nb_clusters * s->cluster_size;
4206            nb_new_data_clusters -= nb_clusters;
4207        }
4208        break;
4209    }
4210
4211    default:
4212        g_assert_not_reached();
4213    }
4214
4215    if (prealloc != PREALLOC_MODE_OFF) {
4216        /* Flush metadata before actually changing the image size */
4217        ret = qcow2_write_caches(bs);
4218        if (ret < 0) {
4219            error_setg_errno(errp, -ret,
4220                             "Failed to flush the preallocated area to disk");
4221            goto fail;
4222        }
4223    }
4224
4225    bs->total_sectors = offset / BDRV_SECTOR_SIZE;
4226
4227    /* write updated header.size */
4228    offset = cpu_to_be64(offset);
4229    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
4230                           &offset, sizeof(uint64_t));
4231    if (ret < 0) {
4232        error_setg_errno(errp, -ret, "Failed to update the image size");
4233        goto fail;
4234    }
4235
4236    s->l1_vm_state_index = new_l1_size;
4237
4238    /* Update cache sizes */
4239    options = qdict_clone_shallow(bs->options);
4240    ret = qcow2_update_options(bs, options, s->flags, errp);
4241    qobject_unref(options);
4242    if (ret < 0) {
4243        goto fail;
4244    }
4245    ret = 0;
4246fail:
4247    qemu_co_mutex_unlock(&s->lock);
4248    return ret;
4249}
4250
4251static coroutine_fn int
4252qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
4253                                 uint64_t offset, uint64_t bytes,
4254                                 QEMUIOVector *qiov, size_t qiov_offset)
4255{
4256    BDRVQcow2State *s = bs->opaque;
4257    int ret;
4258    ssize_t out_len;
4259    uint8_t *buf, *out_buf;
4260    uint64_t cluster_offset;
4261
4262    assert(bytes == s->cluster_size || (bytes < s->cluster_size &&
4263           (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS)));
4264
4265    buf = qemu_blockalign(bs, s->cluster_size);
4266    if (bytes < s->cluster_size) {
4267        /* Zero-pad last write if image size is not cluster aligned */
4268        memset(buf + bytes, 0, s->cluster_size - bytes);
4269    }
4270    qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
4271
4272    out_buf = g_malloc(s->cluster_size);
4273
4274    out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1,
4275                                buf, s->cluster_size);
4276    if (out_len == -ENOMEM) {
4277        /* could not compress: write normal cluster */
4278        ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
4279        if (ret < 0) {
4280            goto fail;
4281        }
4282        goto success;
4283    } else if (out_len < 0) {
4284        ret = -EINVAL;
4285        goto fail;
4286    }
4287
4288    qemu_co_mutex_lock(&s->lock);
4289    ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len,
4290                                                &cluster_offset);
4291    if (ret < 0) {
4292        qemu_co_mutex_unlock(&s->lock);
4293        goto fail;
4294    }
4295
4296    ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true);
4297    qemu_co_mutex_unlock(&s->lock);
4298    if (ret < 0) {
4299        goto fail;
4300    }
4301
4302    BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
4303    ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
4304    if (ret < 0) {
4305        goto fail;
4306    }
4307success:
4308    ret = 0;
4309fail:
4310    qemu_vfree(buf);
4311    g_free(out_buf);
4312    return ret;
4313}
4314
4315static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
4316{
4317    Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
4318
4319    assert(!t->cluster_type && !t->l2meta);
4320
4321    return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, t->qiov,
4322                                            t->qiov_offset);
4323}
4324
4325/*
4326 * XXX: put compressed sectors first, then all the cluster aligned
4327 * tables to avoid losing bytes in alignment
4328 */
4329static coroutine_fn int
4330qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
4331                                 uint64_t offset, uint64_t bytes,
4332                                 QEMUIOVector *qiov, size_t qiov_offset)
4333{
4334    BDRVQcow2State *s = bs->opaque;
4335    AioTaskPool *aio = NULL;
4336    int ret = 0;
4337
4338    if (has_data_file(bs)) {
4339        return -ENOTSUP;
4340    }
4341
4342    if (bytes == 0) {
4343        /*
4344         * align end of file to a sector boundary to ease reading with
4345         * sector based I/Os
4346         */
4347        int64_t len = bdrv_getlength(bs->file->bs);
4348        if (len < 0) {
4349            return len;
4350        }
4351        return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
4352    }
4353
4354    if (offset_into_cluster(s, offset)) {
4355        return -EINVAL;
4356    }
4357
4358    if (offset_into_cluster(s, bytes) &&
4359        (offset + bytes) != (bs->total_sectors << BDRV_SECTOR_BITS)) {
4360        return -EINVAL;
4361    }
4362
4363    while (bytes && aio_task_pool_status(aio) == 0) {
4364        uint64_t chunk_size = MIN(bytes, s->cluster_size);
4365
4366        if (!aio && chunk_size != bytes) {
4367            aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
4368        }
4369
4370        ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry,
4371                             0, 0, offset, chunk_size, qiov, qiov_offset, NULL);
4372        if (ret < 0) {
4373            break;
4374        }
4375        qiov_offset += chunk_size;
4376        offset += chunk_size;
4377        bytes -= chunk_size;
4378    }
4379
4380    if (aio) {
4381        aio_task_pool_wait_all(aio);
4382        if (ret == 0) {
4383            ret = aio_task_pool_status(aio);
4384        }
4385        g_free(aio);
4386    }
4387
4388    return ret;
4389}
4390
4391static int coroutine_fn
4392qcow2_co_preadv_compressed(BlockDriverState *bs,
4393                           uint64_t file_cluster_offset,
4394                           uint64_t offset,
4395                           uint64_t bytes,
4396                           QEMUIOVector *qiov,
4397                           size_t qiov_offset)
4398{
4399    BDRVQcow2State *s = bs->opaque;
4400    int ret = 0, csize, nb_csectors;
4401    uint64_t coffset;
4402    uint8_t *buf, *out_buf;
4403    int offset_in_cluster = offset_into_cluster(s, offset);
4404
4405    coffset = file_cluster_offset & s->cluster_offset_mask;
4406    nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
4407    csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
4408        (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK);
4409
4410    buf = g_try_malloc(csize);
4411    if (!buf) {
4412        return -ENOMEM;
4413    }
4414
4415    out_buf = qemu_blockalign(bs, s->cluster_size);
4416
4417    BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
4418    ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
4419    if (ret < 0) {
4420        goto fail;
4421    }
4422
4423    if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
4424        ret = -EIO;
4425        goto fail;
4426    }
4427
4428    qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);
4429
4430fail:
4431    qemu_vfree(out_buf);
4432    g_free(buf);
4433
4434    return ret;
4435}
4436
4437static int make_completely_empty(BlockDriverState *bs)
4438{
4439    BDRVQcow2State *s = bs->opaque;
4440    Error *local_err = NULL;
4441    int ret, l1_clusters;
4442    int64_t offset;
4443    uint64_t *new_reftable = NULL;
4444    uint64_t rt_entry, l1_size2;
4445    struct {
4446        uint64_t l1_offset;
4447        uint64_t reftable_offset;
4448        uint32_t reftable_clusters;
4449    } QEMU_PACKED l1_ofs_rt_ofs_cls;
4450
4451    ret = qcow2_cache_empty(bs, s->l2_table_cache);
4452    if (ret < 0) {
4453        goto fail;
4454    }
4455
4456    ret = qcow2_cache_empty(bs, s->refcount_block_cache);
4457    if (ret < 0) {
4458        goto fail;
4459    }
4460
4461    /* Refcounts will be broken utterly */
4462    ret = qcow2_mark_dirty(bs);
4463    if (ret < 0) {
4464        goto fail;
4465    }
4466
4467    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4468
4469    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4470    l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
4471
4472    /* After this call, neither the in-memory nor the on-disk refcount
4473     * information accurately describe the actual references */
4474
4475    ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
4476                             l1_clusters * s->cluster_size, 0);
4477    if (ret < 0) {
4478        goto fail_broken_refcounts;
4479    }
4480    memset(s->l1_table, 0, l1_size2);
4481
4482    BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
4483
4484    /* Overwrite enough clusters at the beginning of the sectors to place
4485     * the refcount table, a refcount block and the L1 table in; this may
4486     * overwrite parts of the existing refcount and L1 table, which is not
4487     * an issue because the dirty flag is set, complete data loss is in fact
4488     * desired and partial data loss is consequently fine as well */
4489    ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
4490                             (2 + l1_clusters) * s->cluster_size, 0);
4491    /* This call (even if it failed overall) may have overwritten on-disk
4492     * refcount structures; in that case, the in-memory refcount information
4493     * will probably differ from the on-disk information which makes the BDS
4494     * unusable */
4495    if (ret < 0) {
4496        goto fail_broken_refcounts;
4497    }
4498
4499    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4500    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
4501
4502    /* "Create" an empty reftable (one cluster) directly after the image
4503     * header and an empty L1 table three clusters after the image header;
4504     * the cluster between those two will be used as the first refblock */
4505    l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
4506    l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
4507    l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
4508    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
4509                           &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
4510    if (ret < 0) {
4511        goto fail_broken_refcounts;
4512    }
4513
4514    s->l1_table_offset = 3 * s->cluster_size;
4515
4516    new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
4517    if (!new_reftable) {
4518        ret = -ENOMEM;
4519        goto fail_broken_refcounts;
4520    }
4521
4522    s->refcount_table_offset = s->cluster_size;
4523    s->refcount_table_size   = s->cluster_size / sizeof(uint64_t);
4524    s->max_refcount_table_index = 0;
4525
4526    g_free(s->refcount_table);
4527    s->refcount_table = new_reftable;
4528    new_reftable = NULL;
4529
4530    /* Now the in-memory refcount information again corresponds to the on-disk
4531     * information (reftable is empty and no refblocks (the refblock cache is
4532     * empty)); however, this means some clusters (e.g. the image header) are
4533     * referenced, but not refcounted, but the normal qcow2 code assumes that
4534     * the in-memory information is always correct */
4535
4536    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
4537
4538    /* Enter the first refblock into the reftable */
4539    rt_entry = cpu_to_be64(2 * s->cluster_size);
4540    ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
4541                           &rt_entry, sizeof(rt_entry));
4542    if (ret < 0) {
4543        goto fail_broken_refcounts;
4544    }
4545    s->refcount_table[0] = 2 * s->cluster_size;
4546
4547    s->free_cluster_index = 0;
4548    assert(3 + l1_clusters <= s->refcount_block_size);
4549    offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
4550    if (offset < 0) {
4551        ret = offset;
4552        goto fail_broken_refcounts;
4553    } else if (offset > 0) {
4554        error_report("First cluster in emptied image is in use");
4555        abort();
4556    }
4557
4558    /* Now finally the in-memory information corresponds to the on-disk
4559     * structures and is correct */
4560    ret = qcow2_mark_clean(bs);
4561    if (ret < 0) {
4562        goto fail;
4563    }
4564
4565    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
4566                        PREALLOC_MODE_OFF, &local_err);
4567    if (ret < 0) {
4568        error_report_err(local_err);
4569        goto fail;
4570    }
4571
4572    return 0;
4573
4574fail_broken_refcounts:
4575    /* The BDS is unusable at this point. If we wanted to make it usable, we
4576     * would have to call qcow2_refcount_close(), qcow2_refcount_init(),
4577     * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
4578     * again. However, because the functions which could have caused this error
4579     * path to be taken are used by those functions as well, it's very likely
4580     * that that sequence will fail as well. Therefore, just eject the BDS. */
4581    bs->drv = NULL;
4582
4583fail:
4584    g_free(new_reftable);
4585    return ret;
4586}
4587
4588static int qcow2_make_empty(BlockDriverState *bs)
4589{
4590    BDRVQcow2State *s = bs->opaque;
4591    uint64_t offset, end_offset;
4592    int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
4593    int l1_clusters, ret = 0;
4594
4595    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4596
4597    if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
4598        3 + l1_clusters <= s->refcount_block_size &&
4599        s->crypt_method_header != QCOW_CRYPT_LUKS &&
4600        !has_data_file(bs)) {
4601        /* The following function only works for qcow2 v3 images (it
4602         * requires the dirty flag) and only as long as there are no
4603         * features that reserve extra clusters (such as snapshots,
4604         * LUKS header, or persistent bitmaps), because it completely
4605         * empties the image.  Furthermore, the L1 table and three
4606         * additional clusters (image header, refcount table, one
4607         * refcount block) have to fit inside one refcount block. It
4608         * only resets the image file, i.e. does not work with an
4609         * external data file. */
4610        return make_completely_empty(bs);
4611    }
4612
4613    /* This fallback code simply discards every active cluster; this is slow,
4614     * but works in all cases */
4615    end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
4616    for (offset = 0; offset < end_offset; offset += step) {
4617        /* As this function is generally used after committing an external
4618         * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
4619         * default action for this kind of discard is to pass the discard,
4620         * which will ideally result in an actually smaller image file, as
4621         * is probably desired. */
4622        ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
4623                                    QCOW2_DISCARD_SNAPSHOT, true);
4624        if (ret < 0) {
4625            break;
4626        }
4627    }
4628
4629    return ret;
4630}
4631
4632static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
4633{
4634    BDRVQcow2State *s = bs->opaque;
4635    int ret;
4636
4637    qemu_co_mutex_lock(&s->lock);
4638    ret = qcow2_write_caches(bs);
4639    qemu_co_mutex_unlock(&s->lock);
4640
4641    return ret;
4642}
4643
4644static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
4645                                       Error **errp)
4646{
4647    Error *local_err = NULL;
4648    BlockMeasureInfo *info;
4649    uint64_t required = 0; /* bytes that contribute to required size */
4650    uint64_t virtual_size; /* disk size as seen by guest */
4651    uint64_t refcount_bits;
4652    uint64_t l2_tables;
4653    uint64_t luks_payload_size = 0;
4654    size_t cluster_size;
4655    int version;
4656    char *optstr;
4657    PreallocMode prealloc;
4658    bool has_backing_file;
4659    bool has_luks;
4660
4661    /* Parse image creation options */
4662    cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err);
4663    if (local_err) {
4664        goto err;
4665    }
4666
4667    version = qcow2_opt_get_version_del(opts, &local_err);
4668    if (local_err) {
4669        goto err;
4670    }
4671
4672    refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
4673    if (local_err) {
4674        goto err;
4675    }
4676
4677    optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
4678    prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
4679                               PREALLOC_MODE_OFF, &local_err);
4680    g_free(optstr);
4681    if (local_err) {
4682        goto err;
4683    }
4684
4685    optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
4686    has_backing_file = !!optstr;
4687    g_free(optstr);
4688
4689    optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
4690    has_luks = optstr && strcmp(optstr, "luks") == 0;
4691    g_free(optstr);
4692
4693    if (has_luks) {
4694        g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL;
4695        QDict *opts_qdict;
4696        QDict *cryptoopts;
4697        size_t headerlen;
4698
4699        opts_qdict = qemu_opts_to_qdict(opts, NULL);
4700        qdict_extract_subqdict(opts_qdict, &cryptoopts, "encrypt.");
4701        qobject_unref(opts_qdict);
4702
4703        qdict_put_str(cryptoopts, "format", "luks");
4704
4705        create_opts = block_crypto_create_opts_init(cryptoopts, errp);
4706        qobject_unref(cryptoopts);
4707        if (!create_opts) {
4708            goto err;
4709        }
4710
4711        if (!qcrypto_block_calculate_payload_offset(create_opts,
4712                                                    "encrypt.",
4713                                                    &headerlen,
4714                                                    &local_err)) {
4715            goto err;
4716        }
4717
4718        luks_payload_size = ROUND_UP(headerlen, cluster_size);
4719    }
4720
4721    virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
4722    virtual_size = ROUND_UP(virtual_size, cluster_size);
4723
4724    /* Check that virtual disk size is valid */
4725    l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
4726                             cluster_size / sizeof(uint64_t));
4727    if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) {
4728        error_setg(&local_err, "The image size is too large "
4729                               "(try using a larger cluster size)");
4730        goto err;
4731    }
4732
4733    /* Account for input image */
4734    if (in_bs) {
4735        int64_t ssize = bdrv_getlength(in_bs);
4736        if (ssize < 0) {
4737            error_setg_errno(&local_err, -ssize,
4738                             "Unable to get image virtual_size");
4739            goto err;
4740        }
4741
4742        virtual_size = ROUND_UP(ssize, cluster_size);
4743
4744        if (has_backing_file) {
4745            /* We don't how much of the backing chain is shared by the input
4746             * image and the new image file.  In the worst case the new image's
4747             * backing file has nothing in common with the input image.  Be
4748             * conservative and assume all clusters need to be written.
4749             */
4750            required = virtual_size;
4751        } else {
4752            int64_t offset;
4753            int64_t pnum = 0;
4754
4755            for (offset = 0; offset < ssize; offset += pnum) {
4756                int ret;
4757
4758                ret = bdrv_block_status_above(in_bs, NULL, offset,
4759                                              ssize - offset, &pnum, NULL,
4760                                              NULL);
4761                if (ret < 0) {
4762                    error_setg_errno(&local_err, -ret,
4763                                     "Unable to get block status");
4764                    goto err;
4765                }
4766
4767                if (ret & BDRV_BLOCK_ZERO) {
4768                    /* Skip zero regions (safe with no backing file) */
4769                } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
4770                           (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
4771                    /* Extend pnum to end of cluster for next iteration */
4772                    pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
4773
4774                    /* Count clusters we've seen */
4775                    required += offset % cluster_size + pnum;
4776                }
4777            }
4778        }
4779    }
4780
4781    /* Take into account preallocation.  Nothing special is needed for
4782     * PREALLOC_MODE_METADATA since metadata is always counted.
4783     */
4784    if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
4785        required = virtual_size;
4786    }
4787
4788    info = g_new(BlockMeasureInfo, 1);
4789    info->fully_allocated =
4790        qcow2_calc_prealloc_size(virtual_size, cluster_size,
4791                                 ctz32(refcount_bits)) + luks_payload_size;
4792
4793    /* Remove data clusters that are not required.  This overestimates the
4794     * required size because metadata needed for the fully allocated file is
4795     * still counted.
4796     */
4797    info->required = info->fully_allocated - virtual_size + required;
4798    return info;
4799
4800err:
4801    error_propagate(errp, local_err);
4802    return NULL;
4803}
4804
4805static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4806{
4807    BDRVQcow2State *s = bs->opaque;
4808    bdi->unallocated_blocks_are_zero = true;
4809    bdi->cluster_size = s->cluster_size;
4810    bdi->vm_state_offset = qcow2_vm_state_offset(s);
4811    return 0;
4812}
4813
4814static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
4815                                                  Error **errp)
4816{
4817    BDRVQcow2State *s = bs->opaque;
4818    ImageInfoSpecific *spec_info;
4819    QCryptoBlockInfo *encrypt_info = NULL;
4820    Error *local_err = NULL;
4821
4822    if (s->crypto != NULL) {
4823        encrypt_info = qcrypto_block_get_info(s->crypto, &local_err);
4824        if (local_err) {
4825            error_propagate(errp, local_err);
4826            return NULL;
4827        }
4828    }
4829
4830    spec_info = g_new(ImageInfoSpecific, 1);
4831    *spec_info = (ImageInfoSpecific){
4832        .type  = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
4833        .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1),
4834    };
4835    if (s->qcow_version == 2) {
4836        *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4837            .compat             = g_strdup("0.10"),
4838            .refcount_bits      = s->refcount_bits,
4839        };
4840    } else if (s->qcow_version == 3) {
4841        Qcow2BitmapInfoList *bitmaps;
4842        bitmaps = qcow2_get_bitmap_info_list(bs, &local_err);
4843        if (local_err) {
4844            error_propagate(errp, local_err);
4845            qapi_free_ImageInfoSpecific(spec_info);
4846            qapi_free_QCryptoBlockInfo(encrypt_info);
4847            return NULL;
4848        }
4849        *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4850            .compat             = g_strdup("1.1"),
4851            .lazy_refcounts     = s->compatible_features &
4852                                  QCOW2_COMPAT_LAZY_REFCOUNTS,
4853            .has_lazy_refcounts = true,
4854            .corrupt            = s->incompatible_features &
4855                                  QCOW2_INCOMPAT_CORRUPT,
4856            .has_corrupt        = true,
4857            .refcount_bits      = s->refcount_bits,
4858            .has_bitmaps        = !!bitmaps,
4859            .bitmaps            = bitmaps,
4860            .has_data_file      = !!s->image_data_file,
4861            .data_file          = g_strdup(s->image_data_file),
4862            .has_data_file_raw  = has_data_file(bs),
4863            .data_file_raw      = data_file_is_raw(bs),
4864        };
4865    } else {
4866        /* if this assertion fails, this probably means a new version was
4867         * added without having it covered here */
4868        assert(false);
4869    }
4870
4871    if (encrypt_info) {
4872        ImageInfoSpecificQCow2Encryption *qencrypt =
4873            g_new(ImageInfoSpecificQCow2Encryption, 1);
4874        switch (encrypt_info->format) {
4875        case Q_CRYPTO_BLOCK_FORMAT_QCOW:
4876            qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
4877            break;
4878        case Q_CRYPTO_BLOCK_FORMAT_LUKS:
4879            qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
4880            qencrypt->u.luks = encrypt_info->u.luks;
4881            break;
4882        default:
4883            abort();
4884        }
4885        /* Since we did shallow copy above, erase any pointers
4886         * in the original info */
4887        memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
4888        qapi_free_QCryptoBlockInfo(encrypt_info);
4889
4890        spec_info->u.qcow2.data->has_encrypt = true;
4891        spec_info->u.qcow2.data->encrypt = qencrypt;
4892    }
4893
4894    return spec_info;
4895}
4896
4897static int qcow2_has_zero_init(BlockDriverState *bs)
4898{
4899    BDRVQcow2State *s = bs->opaque;
4900    bool preallocated;
4901
4902    if (qemu_in_coroutine()) {
4903        qemu_co_mutex_lock(&s->lock);
4904    }
4905    /*
4906     * Check preallocation status: Preallocated images have all L2
4907     * tables allocated, nonpreallocated images have none.  It is
4908     * therefore enough to check the first one.
4909     */
4910    preallocated = s->l1_size > 0 && s->l1_table[0] != 0;
4911    if (qemu_in_coroutine()) {
4912        qemu_co_mutex_unlock(&s->lock);
4913    }
4914
4915    if (!preallocated) {
4916        return 1;
4917    } else if (bs->encrypted) {
4918        return 0;
4919    } else {
4920        return bdrv_has_zero_init(s->data_file->bs);
4921    }
4922}
4923
4924static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4925                              int64_t pos)
4926{
4927    BDRVQcow2State *s = bs->opaque;
4928
4929    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
4930    return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
4931                                         qiov->size, qiov, 0, 0);
4932}
4933
4934static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4935                              int64_t pos)
4936{
4937    BDRVQcow2State *s = bs->opaque;
4938
4939    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
4940    return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos,
4941                                        qiov->size, qiov, 0, 0);
4942}
4943
4944/*
4945 * Downgrades an image's version. To achieve this, any incompatible features
4946 * have to be removed.
4947 */
4948static int qcow2_downgrade(BlockDriverState *bs, int target_version,
4949                           BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
4950                           Error **errp)
4951{
4952    BDRVQcow2State *s = bs->opaque;
4953    int current_version = s->qcow_version;
4954    int ret;
4955
4956    /* This is qcow2_downgrade(), not qcow2_upgrade() */
4957    assert(target_version < current_version);
4958
4959    /* There are no other versions (now) that you can downgrade to */
4960    assert(target_version == 2);
4961
4962    if (s->refcount_order != 4) {
4963        error_setg(errp, "compat=0.10 requires refcount_bits=16");
4964        return -ENOTSUP;
4965    }
4966
4967    if (has_data_file(bs)) {
4968        error_setg(errp, "Cannot downgrade an image with a data file");
4969        return -ENOTSUP;
4970    }
4971
4972    /* clear incompatible features */
4973    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
4974        ret = qcow2_mark_clean(bs);
4975        if (ret < 0) {
4976            error_setg_errno(errp, -ret, "Failed to make the image clean");
4977            return ret;
4978        }
4979    }
4980
4981    /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in
4982     * the first place; if that happens nonetheless, returning -ENOTSUP is the
4983     * best thing to do anyway */
4984
4985    if (s->incompatible_features) {
4986        error_setg(errp, "Cannot downgrade an image with incompatible features "
4987                   "%#" PRIx64 " set", s->incompatible_features);
4988        return -ENOTSUP;
4989    }
4990
4991    /* since we can ignore compatible features, we can set them to 0 as well */
4992    s->compatible_features = 0;
4993    /* if lazy refcounts have been used, they have already been fixed through
4994     * clearing the dirty flag */
4995
4996    /* clearing autoclear features is trivial */
4997    s->autoclear_features = 0;
4998
4999    ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
5000    if (ret < 0) {
5001        error_setg_errno(errp, -ret, "Failed to turn zero into data clusters");
5002        return ret;
5003    }
5004
5005    s->qcow_version = target_version;
5006    ret = qcow2_update_header(bs);
5007    if (ret < 0) {
5008        s->qcow_version = current_version;
5009        error_setg_errno(errp, -ret, "Failed to update the image header");
5010        return ret;
5011    }
5012    return 0;
5013}
5014
5015/*
5016 * Upgrades an image's version.  While newer versions encompass all
5017 * features of older versions, some things may have to be presented
5018 * differently.
5019 */
5020static int qcow2_upgrade(BlockDriverState *bs, int target_version,
5021                         BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
5022                         Error **errp)
5023{
5024    BDRVQcow2State *s = bs->opaque;
5025    bool need_snapshot_update;
5026    int current_version = s->qcow_version;
5027    int i;
5028    int ret;
5029
5030    /* This is qcow2_upgrade(), not qcow2_downgrade() */
5031    assert(target_version > current_version);
5032
5033    /* There are no other versions (yet) that you can upgrade to */
5034    assert(target_version == 3);
5035
5036    status_cb(bs, 0, 2, cb_opaque);
5037
5038    /*
5039     * In v2, snapshots do not need to have extra data.  v3 requires
5040     * the 64-bit VM state size and the virtual disk size to be
5041     * present.
5042     * qcow2_write_snapshots() will always write the list in the
5043     * v3-compliant format.
5044     */
5045    need_snapshot_update = false;
5046    for (i = 0; i < s->nb_snapshots; i++) {
5047        if (s->snapshots[i].extra_data_size <
5048            sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
5049            sizeof_field(QCowSnapshotExtraData, disk_size))
5050        {
5051            need_snapshot_update = true;
5052            break;
5053        }
5054    }
5055    if (need_snapshot_update) {
5056        ret = qcow2_write_snapshots(bs);
5057        if (ret < 0) {
5058            error_setg_errno(errp, -ret, "Failed to update the snapshot table");
5059            return ret;
5060        }
5061    }
5062    status_cb(bs, 1, 2, cb_opaque);
5063
5064    s->qcow_version = target_version;
5065    ret = qcow2_update_header(bs);
5066    if (ret < 0) {
5067        s->qcow_version = current_version;
5068        error_setg_errno(errp, -ret, "Failed to update the image header");
5069        return ret;
5070    }
5071    status_cb(bs, 2, 2, cb_opaque);
5072
5073    return 0;
5074}
5075
5076typedef enum Qcow2AmendOperation {
5077    /* This is the value Qcow2AmendHelperCBInfo::last_operation will be
5078     * statically initialized to so that the helper CB can discern the first
5079     * invocation from an operation change */
5080    QCOW2_NO_OPERATION = 0,
5081
5082    QCOW2_UPGRADING,
5083    QCOW2_CHANGING_REFCOUNT_ORDER,
5084    QCOW2_DOWNGRADING,
5085} Qcow2AmendOperation;
5086
5087typedef struct Qcow2AmendHelperCBInfo {
5088    /* The code coordinating the amend operations should only modify
5089     * these four fields; the rest will be managed by the CB */
5090    BlockDriverAmendStatusCB *original_status_cb;
5091    void *original_cb_opaque;
5092
5093    Qcow2AmendOperation current_operation;
5094
5095    /* Total number of operations to perform (only set once) */
5096    int total_operations;
5097
5098    /* The following fields are managed by the CB */
5099
5100    /* Number of operations completed */
5101    int operations_completed;
5102
5103    /* Cumulative offset of all completed operations */
5104    int64_t offset_completed;
5105
5106    Qcow2AmendOperation last_operation;
5107    int64_t last_work_size;
5108} Qcow2AmendHelperCBInfo;
5109
5110static void qcow2_amend_helper_cb(BlockDriverState *bs,
5111                                  int64_t operation_offset,
5112                                  int64_t operation_work_size, void *opaque)
5113{
5114    Qcow2AmendHelperCBInfo *info = opaque;
5115    int64_t current_work_size;
5116    int64_t projected_work_size;
5117
5118    if (info->current_operation != info->last_operation) {
5119        if (info->last_operation != QCOW2_NO_OPERATION) {
5120            info->offset_completed += info->last_work_size;
5121            info->operations_completed++;
5122        }
5123
5124        info->last_operation = info->current_operation;
5125    }
5126
5127    assert(info->total_operations > 0);
5128    assert(info->operations_completed < info->total_operations);
5129
5130    info->last_work_size = operation_work_size;
5131
5132    current_work_size = info->offset_completed + operation_work_size;
5133
5134    /* current_work_size is the total work size for (operations_completed + 1)
5135     * operations (which includes this one), so multiply it by the number of
5136     * operations not covered and divide it by the number of operations
5137     * covered to get a projection for the operations not covered */
5138    projected_work_size = current_work_size * (info->total_operations -
5139                                               info->operations_completed - 1)
5140                                            / (info->operations_completed + 1);
5141
5142    info->original_status_cb(bs, info->offset_completed + operation_offset,
5143                             current_work_size + projected_work_size,
5144                             info->original_cb_opaque);
5145}
5146
5147static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
5148                               BlockDriverAmendStatusCB *status_cb,
5149                               void *cb_opaque,
5150                               Error **errp)
5151{
5152    BDRVQcow2State *s = bs->opaque;
5153    int old_version = s->qcow_version, new_version = old_version;
5154    uint64_t new_size = 0;
5155    const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL;
5156    bool lazy_refcounts = s->use_lazy_refcounts;
5157    bool data_file_raw = data_file_is_raw(bs);
5158    const char *compat = NULL;
5159    uint64_t cluster_size = s->cluster_size;
5160    bool encrypt;
5161    int encformat;
5162    int refcount_bits = s->refcount_bits;
5163    int ret;
5164    QemuOptDesc *desc = opts->list->desc;
5165    Qcow2AmendHelperCBInfo helper_cb_info;
5166
5167    while (desc && desc->name) {
5168        if (!qemu_opt_find(opts, desc->name)) {
5169            /* only change explicitly defined options */
5170            desc++;
5171            continue;
5172        }
5173
5174        if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
5175            compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
5176            if (!compat) {
5177                /* preserve default */
5178            } else if (!strcmp(compat, "0.10") || !strcmp(compat, "v2")) {
5179                new_version = 2;
5180            } else if (!strcmp(compat, "1.1") || !strcmp(compat, "v3")) {
5181                new_version = 3;
5182            } else {
5183                error_setg(errp, "Unknown compatibility level %s", compat);
5184                return -EINVAL;
5185            }
5186        } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
5187            error_setg(errp, "Cannot change preallocation mode");
5188            return -ENOTSUP;
5189        } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
5190            new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5191        } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
5192            backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5193        } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
5194            backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5195        } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
5196            encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
5197                                        !!s->crypto);
5198
5199            if (encrypt != !!s->crypto) {
5200                error_setg(errp,
5201                           "Changing the encryption flag is not supported");
5202                return -ENOTSUP;
5203            }
5204        } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
5205            encformat = qcow2_crypt_method_from_format(
5206                qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));
5207
5208            if (encformat != s->crypt_method_header) {
5209                error_setg(errp,
5210                           "Changing the encryption format is not supported");
5211                return -ENOTSUP;
5212            }
5213        } else if (g_str_has_prefix(desc->name, "encrypt.")) {
5214            error_setg(errp,
5215                       "Changing the encryption parameters is not supported");
5216            return -ENOTSUP;
5217        } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
5218            cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
5219                                             cluster_size);
5220            if (cluster_size != s->cluster_size) {
5221                error_setg(errp, "Changing the cluster size is not supported");
5222                return -ENOTSUP;
5223            }
5224        } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
5225            lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
5226                                               lazy_refcounts);
5227        } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
5228            refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
5229                                                refcount_bits);
5230
5231            if (refcount_bits <= 0 || refcount_bits > 64 ||
5232                !is_power_of_2(refcount_bits))
5233            {
5234                error_setg(errp, "Refcount width must be a power of two and "
5235                           "may not exceed 64 bits");
5236                return -EINVAL;
5237            }
5238        } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) {
5239            data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE);
5240            if (data_file && !has_data_file(bs)) {
5241                error_setg(errp, "data-file can only be set for images that "
5242                                 "use an external data file");
5243                return -EINVAL;
5244            }
5245        } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) {
5246            data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW,
5247                                              data_file_raw);
5248            if (data_file_raw && !data_file_is_raw(bs)) {
5249                error_setg(errp, "data-file-raw cannot be set on existing "
5250                                 "images");
5251                return -EINVAL;
5252            }
5253        } else {
5254            /* if this point is reached, this probably means a new option was
5255             * added without having it covered here */
5256            abort();
5257        }
5258
5259        desc++;
5260    }
5261
5262    helper_cb_info = (Qcow2AmendHelperCBInfo){
5263        .original_status_cb = status_cb,
5264        .original_cb_opaque = cb_opaque,
5265        .total_operations = (new_version != old_version)
5266                          + (s->refcount_bits != refcount_bits)
5267    };
5268
5269    /* Upgrade first (some features may require compat=1.1) */
5270    if (new_version > old_version) {
5271        helper_cb_info.current_operation = QCOW2_UPGRADING;
5272        ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb,
5273                            &helper_cb_info, errp);
5274        if (ret < 0) {
5275            return ret;
5276        }
5277    }
5278
5279    if (s->refcount_bits != refcount_bits) {
5280        int refcount_order = ctz32(refcount_bits);
5281
5282        if (new_version < 3 && refcount_bits != 16) {
5283            error_setg(errp, "Refcount widths other than 16 bits require "
5284                       "compatibility level 1.1 or above (use compat=1.1 or "
5285                       "greater)");
5286            return -EINVAL;
5287        }
5288
5289        helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
5290        ret = qcow2_change_refcount_order(bs, refcount_order,
5291                                          &qcow2_amend_helper_cb,
5292                                          &helper_cb_info, errp);
5293        if (ret < 0) {
5294            return ret;
5295        }
5296    }
5297
5298    /* data-file-raw blocks backing files, so clear it first if requested */
5299    if (data_file_raw) {
5300        s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5301    } else {
5302        s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5303    }
5304
5305    if (data_file) {
5306        g_free(s->image_data_file);
5307        s->image_data_file = *data_file ? g_strdup(data_file) : NULL;
5308    }
5309
5310    ret = qcow2_update_header(bs);
5311    if (ret < 0) {
5312        error_setg_errno(errp, -ret, "Failed to update the image header");
5313        return ret;
5314    }
5315
5316    if (backing_file || backing_format) {
5317        ret = qcow2_change_backing_file(bs,
5318                    backing_file ?: s->image_backing_file,
5319                    backing_format ?: s->image_backing_format);
5320        if (ret < 0) {
5321            error_setg_errno(errp, -ret, "Failed to change the backing file");
5322            return ret;
5323        }
5324    }
5325
5326    if (s->use_lazy_refcounts != lazy_refcounts) {
5327        if (lazy_refcounts) {
5328            if (new_version < 3) {
5329                error_setg(errp, "Lazy refcounts only supported with "
5330                           "compatibility level 1.1 and above (use compat=1.1 "
5331                           "or greater)");
5332                return -EINVAL;
5333            }
5334            s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5335            ret = qcow2_update_header(bs);
5336            if (ret < 0) {
5337                s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5338                error_setg_errno(errp, -ret, "Failed to update the image header");
5339                return ret;
5340            }
5341            s->use_lazy_refcounts = true;
5342        } else {
5343            /* make image clean first */
5344            ret = qcow2_mark_clean(bs);
5345            if (ret < 0) {
5346                error_setg_errno(errp, -ret, "Failed to make the image clean");
5347                return ret;
5348            }
5349            /* now disallow lazy refcounts */
5350            s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5351            ret = qcow2_update_header(bs);
5352            if (ret < 0) {
5353                s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5354                error_setg_errno(errp, -ret, "Failed to update the image header");
5355                return ret;
5356            }
5357            s->use_lazy_refcounts = false;
5358        }
5359    }
5360
5361    if (new_size) {
5362        BlockBackend *blk = blk_new(bdrv_get_aio_context(bs),
5363                                    BLK_PERM_RESIZE, BLK_PERM_ALL);
5364        ret = blk_insert_bs(blk, bs, errp);
5365        if (ret < 0) {
5366            blk_unref(blk);
5367            return ret;
5368        }
5369
5370        /*
5371         * Amending image options should ensure that the image has
5372         * exactly the given new values, so pass exact=true here.
5373         */
5374        ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp);
5375        blk_unref(blk);
5376        if (ret < 0) {
5377            return ret;
5378        }
5379    }
5380
5381    /* Downgrade last (so unsupported features can be removed before) */
5382    if (new_version < old_version) {
5383        helper_cb_info.current_operation = QCOW2_DOWNGRADING;
5384        ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
5385                              &helper_cb_info, errp);
5386        if (ret < 0) {
5387            return ret;
5388        }
5389    }
5390
5391    return 0;
5392}
5393
5394/*
5395 * If offset or size are negative, respectively, they will not be included in
5396 * the BLOCK_IMAGE_CORRUPTED event emitted.
5397 * fatal will be ignored for read-only BDS; corruptions found there will always
5398 * be considered non-fatal.
5399 */
5400void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
5401                             int64_t size, const char *message_format, ...)
5402{
5403    BDRVQcow2State *s = bs->opaque;
5404    const char *node_name;
5405    char *message;
5406    va_list ap;
5407
5408    fatal = fatal && bdrv_is_writable(bs);
5409
5410    if (s->signaled_corruption &&
5411        (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
5412    {
5413        return;
5414    }
5415
5416    va_start(ap, message_format);
5417    message = g_strdup_vprintf(message_format, ap);
5418    va_end(ap);
5419
5420    if (fatal) {
5421        fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
5422                "corruption events will be suppressed\n", message);
5423    } else {
5424        fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
5425                "corruption events will be suppressed\n", message);
5426    }
5427
5428    node_name = bdrv_get_node_name(bs);
5429    qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
5430                                          *node_name != '\0', node_name,
5431                                          message, offset >= 0, offset,
5432                                          size >= 0, size,
5433                                          fatal);
5434    g_free(message);
5435
5436    if (fatal) {
5437        qcow2_mark_corrupt(bs);
5438        bs->drv = NULL; /* make BDS unusable */
5439    }
5440
5441    s->signaled_corruption = true;
5442}
5443
5444static QemuOptsList qcow2_create_opts = {
5445    .name = "qcow2-create-opts",
5446    .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
5447    .desc = {
5448        {
5449            .name = BLOCK_OPT_SIZE,
5450            .type = QEMU_OPT_SIZE,
5451            .help = "Virtual disk size"
5452        },
5453        {
5454            .name = BLOCK_OPT_COMPAT_LEVEL,
5455            .type = QEMU_OPT_STRING,
5456            .help = "Compatibility level (v2 [0.10] or v3 [1.1])"
5457        },
5458        {
5459            .name = BLOCK_OPT_BACKING_FILE,
5460            .type = QEMU_OPT_STRING,
5461            .help = "File name of a base image"
5462        },
5463        {
5464            .name = BLOCK_OPT_BACKING_FMT,
5465            .type = QEMU_OPT_STRING,
5466            .help = "Image format of the base image"
5467        },
5468        {
5469            .name = BLOCK_OPT_DATA_FILE,
5470            .type = QEMU_OPT_STRING,
5471            .help = "File name of an external data file"
5472        },
5473        {
5474            .name = BLOCK_OPT_DATA_FILE_RAW,
5475            .type = QEMU_OPT_BOOL,
5476            .help = "The external data file must stay valid as a raw image"
5477        },
5478        {
5479            .name = BLOCK_OPT_ENCRYPT,
5480            .type = QEMU_OPT_BOOL,
5481            .help = "Encrypt the image with format 'aes'. (Deprecated "
5482                    "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
5483        },
5484        {
5485            .name = BLOCK_OPT_ENCRYPT_FORMAT,
5486            .type = QEMU_OPT_STRING,
5487            .help = "Encrypt the image, format choices: 'aes', 'luks'",
5488        },
5489        BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
5490            "ID of secret providing qcow AES key or LUKS passphrase"),
5491        BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."),
5492        BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."),
5493        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."),
5494        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."),
5495        BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."),
5496        BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
5497        {
5498            .name = BLOCK_OPT_CLUSTER_SIZE,
5499            .type = QEMU_OPT_SIZE,
5500            .help = "qcow2 cluster size",
5501            .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
5502        },
5503        {
5504            .name = BLOCK_OPT_PREALLOC,
5505            .type = QEMU_OPT_STRING,
5506            .help = "Preallocation mode (allowed values: off, metadata, "
5507                    "falloc, full)"
5508        },
5509        {
5510            .name = BLOCK_OPT_LAZY_REFCOUNTS,
5511            .type = QEMU_OPT_BOOL,
5512            .help = "Postpone refcount updates",
5513            .def_value_str = "off"
5514        },
5515        {
5516            .name = BLOCK_OPT_REFCOUNT_BITS,
5517            .type = QEMU_OPT_NUMBER,
5518            .help = "Width of a reference count entry in bits",
5519            .def_value_str = "16"
5520        },
5521        { /* end of list */ }
5522    }
5523};
5524
5525static const char *const qcow2_strong_runtime_opts[] = {
5526    "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,
5527
5528    NULL
5529};
5530
5531BlockDriver bdrv_qcow2 = {
5532    .format_name        = "qcow2",
5533    .instance_size      = sizeof(BDRVQcow2State),
5534    .bdrv_probe         = qcow2_probe,
5535    .bdrv_open          = qcow2_open,
5536    .bdrv_close         = qcow2_close,
5537    .bdrv_reopen_prepare  = qcow2_reopen_prepare,
5538    .bdrv_reopen_commit   = qcow2_reopen_commit,
5539    .bdrv_reopen_commit_post = qcow2_reopen_commit_post,
5540    .bdrv_reopen_abort    = qcow2_reopen_abort,
5541    .bdrv_join_options    = qcow2_join_options,
5542    .bdrv_child_perm      = bdrv_format_default_perms,
5543    .bdrv_co_create_opts  = qcow2_co_create_opts,
5544    .bdrv_co_create       = qcow2_co_create,
5545    .bdrv_has_zero_init   = qcow2_has_zero_init,
5546    .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
5547    .bdrv_co_block_status = qcow2_co_block_status,
5548
5549    .bdrv_co_preadv_part    = qcow2_co_preadv_part,
5550    .bdrv_co_pwritev_part   = qcow2_co_pwritev_part,
5551    .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
5552
5553    .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
5554    .bdrv_co_pdiscard       = qcow2_co_pdiscard,
5555    .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
5556    .bdrv_co_copy_range_to  = qcow2_co_copy_range_to,
5557    .bdrv_co_truncate       = qcow2_co_truncate,
5558    .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
5559    .bdrv_make_empty        = qcow2_make_empty,
5560
5561    .bdrv_snapshot_create   = qcow2_snapshot_create,
5562    .bdrv_snapshot_goto     = qcow2_snapshot_goto,
5563    .bdrv_snapshot_delete   = qcow2_snapshot_delete,
5564    .bdrv_snapshot_list     = qcow2_snapshot_list,
5565    .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
5566    .bdrv_measure           = qcow2_measure,
5567    .bdrv_get_info          = qcow2_get_info,
5568    .bdrv_get_specific_info = qcow2_get_specific_info,
5569
5570    .bdrv_save_vmstate    = qcow2_save_vmstate,
5571    .bdrv_load_vmstate    = qcow2_load_vmstate,
5572
5573    .supports_backing           = true,
5574    .bdrv_change_backing_file   = qcow2_change_backing_file,
5575
5576    .bdrv_refresh_limits        = qcow2_refresh_limits,
5577    .bdrv_co_invalidate_cache   = qcow2_co_invalidate_cache,
5578    .bdrv_inactivate            = qcow2_inactivate,
5579
5580    .create_opts         = &qcow2_create_opts,
5581    .strong_runtime_opts = qcow2_strong_runtime_opts,
5582    .mutable_opts        = mutable_opts,
5583    .bdrv_co_check       = qcow2_co_check,
5584    .bdrv_amend_options  = qcow2_amend_options,
5585
5586    .bdrv_detach_aio_context  = qcow2_detach_aio_context,
5587    .bdrv_attach_aio_context  = qcow2_attach_aio_context,
5588
5589    .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap,
5590    .bdrv_co_remove_persistent_dirty_bitmap =
5591            qcow2_co_remove_persistent_dirty_bitmap,
5592};
5593
5594static void bdrv_qcow2_init(void)
5595{
5596    bdrv_register(&bdrv_qcow2);
5597}
5598
5599block_init(bdrv_qcow2_init);
5600