qemu/block/qcow2-snapshot.c
<<
>>
Prefs
   1/*
   2 * Block driver for the QCOW version 2 format
   3 *
   4 * Copyright (c) 2004-2006 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "sysemu/block-backend.h"
  27#include "qapi/error.h"
  28#include "qcow2.h"
  29#include "qemu/bswap.h"
  30#include "qemu/error-report.h"
  31#include "qemu/cutils.h"
  32
  33static void qcow2_free_single_snapshot(BlockDriverState *bs, int i)
  34{
  35    BDRVQcow2State *s = bs->opaque;
  36
  37    assert(i >= 0 && i < s->nb_snapshots);
  38    g_free(s->snapshots[i].name);
  39    g_free(s->snapshots[i].id_str);
  40    g_free(s->snapshots[i].unknown_extra_data);
  41    memset(&s->snapshots[i], 0, sizeof(s->snapshots[i]));
  42}
  43
  44void qcow2_free_snapshots(BlockDriverState *bs)
  45{
  46    BDRVQcow2State *s = bs->opaque;
  47    int i;
  48
  49    for(i = 0; i < s->nb_snapshots; i++) {
  50        qcow2_free_single_snapshot(bs, i);
  51    }
  52    g_free(s->snapshots);
  53    s->snapshots = NULL;
  54    s->nb_snapshots = 0;
  55}
  56
  57/*
  58 * If @repair is true, try to repair a broken snapshot table instead
  59 * of just returning an error:
  60 *
  61 * - If the snapshot table was too long, set *nb_clusters_reduced to
  62 *   the number of snapshots removed off the end.
  63 *   The caller will update the on-disk nb_snapshots accordingly;
  64 *   this leaks clusters, but is safe.
  65 *   (The on-disk information must be updated before
  66 *   qcow2_check_refcounts(), because that function relies on
  67 *   s->nb_snapshots to reflect the on-disk value.)
  68 *
  69 * - If there were snapshots with too much extra metadata, increment
  70 *   *extra_data_dropped for each.
  71 *   This requires the caller to eventually rewrite the whole snapshot
  72 *   table, which requires cluster allocation.  Therefore, this should
  73 *   be done only after qcow2_check_refcounts() made sure the refcount
  74 *   structures are valid.
  75 *   (In the meantime, the image is still valid because
  76 *   qcow2_check_refcounts() does not do anything with snapshots'
  77 *   extra data.)
  78 */
  79static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
  80                                   int *nb_clusters_reduced,
  81                                   int *extra_data_dropped,
  82                                   Error **errp)
  83{
  84    BDRVQcow2State *s = bs->opaque;
  85    QCowSnapshotHeader h;
  86    QCowSnapshotExtraData extra;
  87    QCowSnapshot *sn;
  88    int i, id_str_size, name_size;
  89    int64_t offset, pre_sn_offset;
  90    uint64_t table_length = 0;
  91    int ret;
  92
  93    if (!s->nb_snapshots) {
  94        s->snapshots = NULL;
  95        s->snapshots_size = 0;
  96        return 0;
  97    }
  98
  99    offset = s->snapshots_offset;
 100    s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
 101
 102    for(i = 0; i < s->nb_snapshots; i++) {
 103        bool truncate_unknown_extra_data = false;
 104
 105        pre_sn_offset = offset;
 106        table_length = ROUND_UP(table_length, 8);
 107
 108        /* Read statically sized part of the snapshot header */
 109        offset = ROUND_UP(offset, 8);
 110        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
 111        if (ret < 0) {
 112            error_setg_errno(errp, -ret, "Failed to read snapshot table");
 113            goto fail;
 114        }
 115
 116        offset += sizeof(h);
 117        sn = s->snapshots + i;
 118        sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
 119        sn->l1_size = be32_to_cpu(h.l1_size);
 120        sn->vm_state_size = be32_to_cpu(h.vm_state_size);
 121        sn->date_sec = be32_to_cpu(h.date_sec);
 122        sn->date_nsec = be32_to_cpu(h.date_nsec);
 123        sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
 124        sn->extra_data_size = be32_to_cpu(h.extra_data_size);
 125
 126        id_str_size = be16_to_cpu(h.id_str_size);
 127        name_size = be16_to_cpu(h.name_size);
 128
 129        if (sn->extra_data_size > QCOW_MAX_SNAPSHOT_EXTRA_DATA) {
 130            if (!repair) {
 131                ret = -EFBIG;
 132                error_setg(errp, "Too much extra metadata in snapshot table "
 133                           "entry %i", i);
 134                error_append_hint(errp, "You can force-remove this extra "
 135                                  "metadata with qemu-img check -r all\n");
 136                goto fail;
 137            }
 138
 139            fprintf(stderr, "Discarding too much extra metadata in snapshot "
 140                    "table entry %i (%" PRIu32 " > %u)\n",
 141                    i, sn->extra_data_size, QCOW_MAX_SNAPSHOT_EXTRA_DATA);
 142
 143            (*extra_data_dropped)++;
 144            truncate_unknown_extra_data = true;
 145        }
 146
 147        /* Read known extra data */
 148        ret = bdrv_pread(bs->file, offset, &extra,
 149                         MIN(sizeof(extra), sn->extra_data_size));
 150        if (ret < 0) {
 151            error_setg_errno(errp, -ret, "Failed to read snapshot table");
 152            goto fail;
 153        }
 154        offset += MIN(sizeof(extra), sn->extra_data_size);
 155
 156        if (sn->extra_data_size >= endof(QCowSnapshotExtraData,
 157                                         vm_state_size_large)) {
 158            sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
 159        }
 160
 161        if (sn->extra_data_size >= endof(QCowSnapshotExtraData, disk_size)) {
 162            sn->disk_size = be64_to_cpu(extra.disk_size);
 163        } else {
 164            sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
 165        }
 166
 167        if (sn->extra_data_size >= endof(QCowSnapshotExtraData, icount)) {
 168            sn->icount = be64_to_cpu(extra.icount);
 169        } else {
 170            sn->icount = -1ULL;
 171        }
 172
 173        if (sn->extra_data_size > sizeof(extra)) {
 174            uint64_t extra_data_end;
 175            size_t unknown_extra_data_size;
 176
 177            extra_data_end = offset + sn->extra_data_size - sizeof(extra);
 178
 179            if (truncate_unknown_extra_data) {
 180                sn->extra_data_size = QCOW_MAX_SNAPSHOT_EXTRA_DATA;
 181            }
 182
 183            /* Store unknown extra data */
 184            unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
 185            sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
 186            ret = bdrv_pread(bs->file, offset, sn->unknown_extra_data,
 187                             unknown_extra_data_size);
 188            if (ret < 0) {
 189                error_setg_errno(errp, -ret,
 190                                 "Failed to read snapshot table");
 191                goto fail;
 192            }
 193            offset = extra_data_end;
 194        }
 195
 196        /* Read snapshot ID */
 197        sn->id_str = g_malloc(id_str_size + 1);
 198        ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
 199        if (ret < 0) {
 200            error_setg_errno(errp, -ret, "Failed to read snapshot table");
 201            goto fail;
 202        }
 203        offset += id_str_size;
 204        sn->id_str[id_str_size] = '\0';
 205
 206        /* Read snapshot name */
 207        sn->name = g_malloc(name_size + 1);
 208        ret = bdrv_pread(bs->file, offset, sn->name, name_size);
 209        if (ret < 0) {
 210            error_setg_errno(errp, -ret, "Failed to read snapshot table");
 211            goto fail;
 212        }
 213        offset += name_size;
 214        sn->name[name_size] = '\0';
 215
 216        /* Note that the extra data may have been truncated */
 217        table_length += sizeof(h) + sn->extra_data_size + id_str_size +
 218                        name_size;
 219        if (!repair) {
 220            assert(table_length == offset - s->snapshots_offset);
 221        }
 222
 223        if (table_length > QCOW_MAX_SNAPSHOTS_SIZE ||
 224            offset - s->snapshots_offset > INT_MAX)
 225        {
 226            if (!repair) {
 227                ret = -EFBIG;
 228                error_setg(errp, "Snapshot table is too big");
 229                error_append_hint(errp, "You can force-remove all %u "
 230                                  "overhanging snapshots with qemu-img check "
 231                                  "-r all\n", s->nb_snapshots - i);
 232                goto fail;
 233            }
 234
 235            fprintf(stderr, "Discarding %u overhanging snapshots (snapshot "
 236                    "table is too big)\n", s->nb_snapshots - i);
 237
 238            *nb_clusters_reduced += (s->nb_snapshots - i);
 239
 240            /* Discard current snapshot also */
 241            qcow2_free_single_snapshot(bs, i);
 242
 243            /*
 244             * This leaks all the rest of the snapshot table and the
 245             * snapshots' clusters, but we run in check -r all mode,
 246             * so qcow2_check_refcounts() will take care of it.
 247             */
 248            s->nb_snapshots = i;
 249            offset = pre_sn_offset;
 250            break;
 251        }
 252    }
 253
 254    assert(offset - s->snapshots_offset <= INT_MAX);
 255    s->snapshots_size = offset - s->snapshots_offset;
 256    return 0;
 257
 258fail:
 259    qcow2_free_snapshots(bs);
 260    return ret;
 261}
 262
 263int qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
 264{
 265    return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
 266}
 267
 268/* add at the end of the file a new list of snapshots */
 269int qcow2_write_snapshots(BlockDriverState *bs)
 270{
 271    BDRVQcow2State *s = bs->opaque;
 272    QCowSnapshot *sn;
 273    QCowSnapshotHeader h;
 274    QCowSnapshotExtraData extra;
 275    int i, name_size, id_str_size, snapshots_size;
 276    struct {
 277        uint32_t nb_snapshots;
 278        uint64_t snapshots_offset;
 279    } QEMU_PACKED header_data;
 280    int64_t offset, snapshots_offset = 0;
 281    int ret;
 282
 283    /* compute the size of the snapshots */
 284    offset = 0;
 285    for(i = 0; i < s->nb_snapshots; i++) {
 286        sn = s->snapshots + i;
 287        offset = ROUND_UP(offset, 8);
 288        offset += sizeof(h);
 289        offset += MAX(sizeof(extra), sn->extra_data_size);
 290        offset += strlen(sn->id_str);
 291        offset += strlen(sn->name);
 292
 293        if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
 294            ret = -EFBIG;
 295            goto fail;
 296        }
 297    }
 298
 299    assert(offset <= INT_MAX);
 300    snapshots_size = offset;
 301
 302    /* Allocate space for the new snapshot list */
 303    snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
 304    offset = snapshots_offset;
 305    if (offset < 0) {
 306        ret = offset;
 307        goto fail;
 308    }
 309    ret = bdrv_flush(bs);
 310    if (ret < 0) {
 311        goto fail;
 312    }
 313
 314    /* The snapshot list position has not yet been updated, so these clusters
 315     * must indeed be completely free */
 316    ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false);
 317    if (ret < 0) {
 318        goto fail;
 319    }
 320
 321
 322    /* Write all snapshots to the new list */
 323    for(i = 0; i < s->nb_snapshots; i++) {
 324        sn = s->snapshots + i;
 325        memset(&h, 0, sizeof(h));
 326        h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
 327        h.l1_size = cpu_to_be32(sn->l1_size);
 328        /* If it doesn't fit in 32 bit, older implementations should treat it
 329         * as a disk-only snapshot rather than truncate the VM state */
 330        if (sn->vm_state_size <= 0xffffffff) {
 331            h.vm_state_size = cpu_to_be32(sn->vm_state_size);
 332        }
 333        h.date_sec = cpu_to_be32(sn->date_sec);
 334        h.date_nsec = cpu_to_be32(sn->date_nsec);
 335        h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
 336        h.extra_data_size = cpu_to_be32(MAX(sizeof(extra),
 337                                            sn->extra_data_size));
 338
 339        memset(&extra, 0, sizeof(extra));
 340        extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
 341        extra.disk_size = cpu_to_be64(sn->disk_size);
 342        extra.icount = cpu_to_be64(sn->icount);
 343
 344        id_str_size = strlen(sn->id_str);
 345        name_size = strlen(sn->name);
 346        assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
 347        h.id_str_size = cpu_to_be16(id_str_size);
 348        h.name_size = cpu_to_be16(name_size);
 349        offset = ROUND_UP(offset, 8);
 350
 351        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
 352        if (ret < 0) {
 353            goto fail;
 354        }
 355        offset += sizeof(h);
 356
 357        ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
 358        if (ret < 0) {
 359            goto fail;
 360        }
 361        offset += sizeof(extra);
 362
 363        if (sn->extra_data_size > sizeof(extra)) {
 364            size_t unknown_extra_data_size =
 365                sn->extra_data_size - sizeof(extra);
 366
 367            /* qcow2_read_snapshots() ensures no unbounded allocation */
 368            assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES);
 369            assert(sn->unknown_extra_data);
 370
 371            ret = bdrv_pwrite(bs->file, offset, sn->unknown_extra_data,
 372                              unknown_extra_data_size);
 373            if (ret < 0) {
 374                goto fail;
 375            }
 376            offset += unknown_extra_data_size;
 377        }
 378
 379        ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
 380        if (ret < 0) {
 381            goto fail;
 382        }
 383        offset += id_str_size;
 384
 385        ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
 386        if (ret < 0) {
 387            goto fail;
 388        }
 389        offset += name_size;
 390    }
 391
 392    /*
 393     * Update the header to point to the new snapshot table. This requires the
 394     * new table and its refcounts to be stable on disk.
 395     */
 396    ret = bdrv_flush(bs);
 397    if (ret < 0) {
 398        goto fail;
 399    }
 400
 401    QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
 402                      endof(QCowHeader, nb_snapshots));
 403
 404    header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
 405    header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
 406
 407    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
 408                           &header_data, sizeof(header_data));
 409    if (ret < 0) {
 410        goto fail;
 411    }
 412
 413    /* free the old snapshot table */
 414    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
 415                        QCOW2_DISCARD_SNAPSHOT);
 416    s->snapshots_offset = snapshots_offset;
 417    s->snapshots_size = snapshots_size;
 418    return 0;
 419
 420fail:
 421    if (snapshots_offset > 0) {
 422        qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
 423                            QCOW2_DISCARD_ALWAYS);
 424    }
 425    return ret;
 426}
 427
 428int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
 429                                                 BdrvCheckResult *result,
 430                                                 BdrvCheckMode fix)
 431{
 432    BDRVQcow2State *s = bs->opaque;
 433    Error *local_err = NULL;
 434    int nb_clusters_reduced = 0;
 435    int extra_data_dropped = 0;
 436    int ret;
 437    struct {
 438        uint32_t nb_snapshots;
 439        uint64_t snapshots_offset;
 440    } QEMU_PACKED snapshot_table_pointer;
 441
 442    /* qcow2_do_open() discards this information in check mode */
 443    ret = bdrv_pread(bs->file, offsetof(QCowHeader, nb_snapshots),
 444                     &snapshot_table_pointer, sizeof(snapshot_table_pointer));
 445    if (ret < 0) {
 446        result->check_errors++;
 447        fprintf(stderr, "ERROR failed to read the snapshot table pointer from "
 448                "the image header: %s\n", strerror(-ret));
 449        return ret;
 450    }
 451
 452    s->snapshots_offset = be64_to_cpu(snapshot_table_pointer.snapshots_offset);
 453    s->nb_snapshots = be32_to_cpu(snapshot_table_pointer.nb_snapshots);
 454
 455    if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS && (fix & BDRV_FIX_ERRORS)) {
 456        fprintf(stderr, "Discarding %u overhanging snapshots\n",
 457                s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
 458
 459        nb_clusters_reduced += s->nb_snapshots - QCOW_MAX_SNAPSHOTS;
 460        s->nb_snapshots = QCOW_MAX_SNAPSHOTS;
 461    }
 462
 463    ret = qcow2_validate_table(bs, s->snapshots_offset, s->nb_snapshots,
 464                               sizeof(QCowSnapshotHeader),
 465                               sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
 466                               "snapshot table", &local_err);
 467    if (ret < 0) {
 468        result->check_errors++;
 469        error_reportf_err(local_err, "ERROR ");
 470
 471        if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS) {
 472            fprintf(stderr, "You can force-remove all %u overhanging snapshots "
 473                    "with qemu-img check -r all\n",
 474                    s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
 475        }
 476
 477        /* We did not read the snapshot table, so invalidate this information */
 478        s->snapshots_offset = 0;
 479        s->nb_snapshots = 0;
 480
 481        return ret;
 482    }
 483
 484    qemu_co_mutex_unlock(&s->lock);
 485    ret = qcow2_do_read_snapshots(bs, fix & BDRV_FIX_ERRORS,
 486                                  &nb_clusters_reduced, &extra_data_dropped,
 487                                  &local_err);
 488    qemu_co_mutex_lock(&s->lock);
 489    if (ret < 0) {
 490        result->check_errors++;
 491        error_reportf_err(local_err,
 492                          "ERROR failed to read the snapshot table: ");
 493
 494        /* We did not read the snapshot table, so invalidate this information */
 495        s->snapshots_offset = 0;
 496        s->nb_snapshots = 0;
 497
 498        return ret;
 499    }
 500    result->corruptions += nb_clusters_reduced + extra_data_dropped;
 501
 502    if (nb_clusters_reduced) {
 503        /*
 504         * Update image header now, because:
 505         * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be
 506         *     the same as what the image header says,
 507         * (2) this leaks clusters, but qcow2_check_refcounts() will
 508         *     fix that.
 509         */
 510        assert(fix & BDRV_FIX_ERRORS);
 511
 512        snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
 513        ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
 514                               &snapshot_table_pointer.nb_snapshots,
 515                               sizeof(snapshot_table_pointer.nb_snapshots));
 516        if (ret < 0) {
 517            result->check_errors++;
 518            fprintf(stderr, "ERROR failed to update the snapshot count in the "
 519                    "image header: %s\n", strerror(-ret));
 520            return ret;
 521        }
 522
 523        result->corruptions_fixed += nb_clusters_reduced;
 524        result->corruptions -= nb_clusters_reduced;
 525    }
 526
 527    /*
 528     * All of v3 images' snapshot table entries need to have at least
 529     * 16 bytes of extra data.
 530     */
 531    if (s->qcow_version >= 3) {
 532        int i;
 533        for (i = 0; i < s->nb_snapshots; i++) {
 534            if (s->snapshots[i].extra_data_size <
 535                sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
 536                sizeof_field(QCowSnapshotExtraData, disk_size))
 537            {
 538                result->corruptions++;
 539                fprintf(stderr, "%s snapshot table entry %i is incomplete\n",
 540                        fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
 541            }
 542        }
 543    }
 544
 545    return 0;
 546}
 547
 548int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
 549                                                BdrvCheckResult *result,
 550                                                BdrvCheckMode fix)
 551{
 552    BDRVQcow2State *s = bs->opaque;
 553    int ret;
 554
 555    if (result->corruptions && (fix & BDRV_FIX_ERRORS)) {
 556        qemu_co_mutex_unlock(&s->lock);
 557        ret = qcow2_write_snapshots(bs);
 558        qemu_co_mutex_lock(&s->lock);
 559        if (ret < 0) {
 560            result->check_errors++;
 561            fprintf(stderr, "ERROR failed to update snapshot table: %s\n",
 562                    strerror(-ret));
 563            return ret;
 564        }
 565
 566        result->corruptions_fixed += result->corruptions;
 567        result->corruptions = 0;
 568    }
 569
 570    return 0;
 571}
 572
 573static void find_new_snapshot_id(BlockDriverState *bs,
 574                                 char *id_str, int id_str_size)
 575{
 576    BDRVQcow2State *s = bs->opaque;
 577    QCowSnapshot *sn;
 578    int i;
 579    unsigned long id, id_max = 0;
 580
 581    for(i = 0; i < s->nb_snapshots; i++) {
 582        sn = s->snapshots + i;
 583        id = strtoul(sn->id_str, NULL, 10);
 584        if (id > id_max)
 585            id_max = id;
 586    }
 587    snprintf(id_str, id_str_size, "%lu", id_max + 1);
 588}
 589
 590static int find_snapshot_by_id_and_name(BlockDriverState *bs,
 591                                        const char *id,
 592                                        const char *name)
 593{
 594    BDRVQcow2State *s = bs->opaque;
 595    int i;
 596
 597    if (id && name) {
 598        for (i = 0; i < s->nb_snapshots; i++) {
 599            if (!strcmp(s->snapshots[i].id_str, id) &&
 600                !strcmp(s->snapshots[i].name, name)) {
 601                return i;
 602            }
 603        }
 604    } else if (id) {
 605        for (i = 0; i < s->nb_snapshots; i++) {
 606            if (!strcmp(s->snapshots[i].id_str, id)) {
 607                return i;
 608            }
 609        }
 610    } else if (name) {
 611        for (i = 0; i < s->nb_snapshots; i++) {
 612            if (!strcmp(s->snapshots[i].name, name)) {
 613                return i;
 614            }
 615        }
 616    }
 617
 618    return -1;
 619}
 620
 621static int find_snapshot_by_id_or_name(BlockDriverState *bs,
 622                                       const char *id_or_name)
 623{
 624    int ret;
 625
 626    ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
 627    if (ret >= 0) {
 628        return ret;
 629    }
 630    return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
 631}
 632
 633/* if no id is provided, a new one is constructed */
 634int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 635{
 636    BDRVQcow2State *s = bs->opaque;
 637    QCowSnapshot *new_snapshot_list = NULL;
 638    QCowSnapshot *old_snapshot_list = NULL;
 639    QCowSnapshot sn1, *sn = &sn1;
 640    int i, ret;
 641    uint64_t *l1_table = NULL;
 642    int64_t l1_table_offset;
 643
 644    if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
 645        return -EFBIG;
 646    }
 647
 648    if (has_data_file(bs)) {
 649        return -ENOTSUP;
 650    }
 651
 652    memset(sn, 0, sizeof(*sn));
 653
 654    /* Generate an ID */
 655    find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
 656
 657    /* Populate sn with passed data */
 658    sn->id_str = g_strdup(sn_info->id_str);
 659    sn->name = g_strdup(sn_info->name);
 660
 661    sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
 662    sn->vm_state_size = sn_info->vm_state_size;
 663    sn->date_sec = sn_info->date_sec;
 664    sn->date_nsec = sn_info->date_nsec;
 665    sn->vm_clock_nsec = sn_info->vm_clock_nsec;
 666    sn->icount = sn_info->icount;
 667    sn->extra_data_size = sizeof(QCowSnapshotExtraData);
 668
 669    /* Allocate the L1 table of the snapshot and copy the current one there. */
 670    l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * L1E_SIZE);
 671    if (l1_table_offset < 0) {
 672        ret = l1_table_offset;
 673        goto fail;
 674    }
 675
 676    sn->l1_table_offset = l1_table_offset;
 677    sn->l1_size = s->l1_size;
 678
 679    l1_table = g_try_new(uint64_t, s->l1_size);
 680    if (s->l1_size && l1_table == NULL) {
 681        ret = -ENOMEM;
 682        goto fail;
 683    }
 684
 685    for(i = 0; i < s->l1_size; i++) {
 686        l1_table[i] = cpu_to_be64(s->l1_table[i]);
 687    }
 688
 689    ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
 690                                        s->l1_size * L1E_SIZE, false);
 691    if (ret < 0) {
 692        goto fail;
 693    }
 694
 695    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
 696                      s->l1_size * L1E_SIZE);
 697    if (ret < 0) {
 698        goto fail;
 699    }
 700
 701    g_free(l1_table);
 702    l1_table = NULL;
 703
 704    /*
 705     * Increase the refcounts of all clusters and make sure everything is
 706     * stable on disk before updating the snapshot table to contain a pointer
 707     * to the new L1 table.
 708     */
 709    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
 710    if (ret < 0) {
 711        goto fail;
 712    }
 713
 714    /* Append the new snapshot to the snapshot list */
 715    new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
 716    if (s->snapshots) {
 717        memcpy(new_snapshot_list, s->snapshots,
 718               s->nb_snapshots * sizeof(QCowSnapshot));
 719        old_snapshot_list = s->snapshots;
 720    }
 721    s->snapshots = new_snapshot_list;
 722    s->snapshots[s->nb_snapshots++] = *sn;
 723
 724    ret = qcow2_write_snapshots(bs);
 725    if (ret < 0) {
 726        g_free(s->snapshots);
 727        s->snapshots = old_snapshot_list;
 728        s->nb_snapshots--;
 729        goto fail;
 730    }
 731
 732    g_free(old_snapshot_list);
 733
 734    /* The VM state isn't needed any more in the active L1 table; in fact, it
 735     * hurts by causing expensive COW for the next snapshot. */
 736    qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
 737                          ROUND_UP(sn->vm_state_size, s->cluster_size),
 738                          QCOW2_DISCARD_NEVER, false);
 739
 740#ifdef DEBUG_ALLOC
 741    {
 742      BdrvCheckResult result = {0};
 743      qcow2_check_refcounts(bs, &result, 0);
 744    }
 745#endif
 746    return 0;
 747
 748fail:
 749    g_free(sn->id_str);
 750    g_free(sn->name);
 751    g_free(l1_table);
 752
 753    return ret;
 754}
 755
 756/* copy the snapshot 'snapshot_name' into the current disk image */
 757int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 758{
 759    BDRVQcow2State *s = bs->opaque;
 760    QCowSnapshot *sn;
 761    Error *local_err = NULL;
 762    int i, snapshot_index;
 763    int cur_l1_bytes, sn_l1_bytes;
 764    int ret;
 765    uint64_t *sn_l1_table = NULL;
 766
 767    if (has_data_file(bs)) {
 768        return -ENOTSUP;
 769    }
 770
 771    /* Search the snapshot */
 772    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
 773    if (snapshot_index < 0) {
 774        return -ENOENT;
 775    }
 776    sn = &s->snapshots[snapshot_index];
 777
 778    ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
 779                               L1E_SIZE, QCOW_MAX_L1_SIZE,
 780                               "Snapshot L1 table", &local_err);
 781    if (ret < 0) {
 782        error_report_err(local_err);
 783        goto fail;
 784    }
 785
 786    if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
 787        BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL,
 788                                            &local_err);
 789        if (!blk) {
 790            error_report_err(local_err);
 791            ret = -ENOTSUP;
 792            goto fail;
 793        }
 794
 795        ret = blk_truncate(blk, sn->disk_size, true, PREALLOC_MODE_OFF, 0,
 796                           &local_err);
 797        blk_unref(blk);
 798        if (ret < 0) {
 799            error_report_err(local_err);
 800            goto fail;
 801        }
 802    }
 803
 804    /*
 805     * Make sure that the current L1 table is big enough to contain the whole
 806     * L1 table of the snapshot. If the snapshot L1 table is smaller, the
 807     * current one must be padded with zeros.
 808     */
 809    ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
 810    if (ret < 0) {
 811        goto fail;
 812    }
 813
 814    cur_l1_bytes = s->l1_size * L1E_SIZE;
 815    sn_l1_bytes = sn->l1_size * L1E_SIZE;
 816
 817    /*
 818     * Copy the snapshot L1 table to the current L1 table.
 819     *
 820     * Before overwriting the old current L1 table on disk, make sure to
 821     * increase all refcounts for the clusters referenced by the new one.
 822     * Decrease the refcount referenced by the old one only when the L1
 823     * table is overwritten.
 824     */
 825    sn_l1_table = g_try_malloc0(cur_l1_bytes);
 826    if (cur_l1_bytes && sn_l1_table == NULL) {
 827        ret = -ENOMEM;
 828        goto fail;
 829    }
 830
 831    ret = bdrv_pread(bs->file, sn->l1_table_offset,
 832                     sn_l1_table, sn_l1_bytes);
 833    if (ret < 0) {
 834        goto fail;
 835    }
 836
 837    ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
 838                                         sn->l1_size, 1);
 839    if (ret < 0) {
 840        goto fail;
 841    }
 842
 843    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
 844                                        s->l1_table_offset, cur_l1_bytes,
 845                                        false);
 846    if (ret < 0) {
 847        goto fail;
 848    }
 849
 850    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
 851                           cur_l1_bytes);
 852    if (ret < 0) {
 853        goto fail;
 854    }
 855
 856    /*
 857     * Decrease refcount of clusters of current L1 table.
 858     *
 859     * At this point, the in-memory s->l1_table points to the old L1 table,
 860     * whereas on disk we already have the new one.
 861     *
 862     * qcow2_update_snapshot_refcount special cases the current L1 table to use
 863     * the in-memory data instead of really using the offset to load a new one,
 864     * which is why this works.
 865     */
 866    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
 867                                         s->l1_size, -1);
 868
 869    /*
 870     * Now update the in-memory L1 table to be in sync with the on-disk one. We
 871     * need to do this even if updating refcounts failed.
 872     */
 873    for(i = 0;i < s->l1_size; i++) {
 874        s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
 875    }
 876
 877    if (ret < 0) {
 878        goto fail;
 879    }
 880
 881    g_free(sn_l1_table);
 882    sn_l1_table = NULL;
 883
 884    /*
 885     * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
 886     * when we decreased the refcount of the old snapshot.
 887     */
 888    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
 889    if (ret < 0) {
 890        goto fail;
 891    }
 892
 893#ifdef DEBUG_ALLOC
 894    {
 895        BdrvCheckResult result = {0};
 896        qcow2_check_refcounts(bs, &result, 0);
 897    }
 898#endif
 899    return 0;
 900
 901fail:
 902    g_free(sn_l1_table);
 903    return ret;
 904}
 905
 906int qcow2_snapshot_delete(BlockDriverState *bs,
 907                          const char *snapshot_id,
 908                          const char *name,
 909                          Error **errp)
 910{
 911    BDRVQcow2State *s = bs->opaque;
 912    QCowSnapshot sn;
 913    int snapshot_index, ret;
 914
 915    if (has_data_file(bs)) {
 916        return -ENOTSUP;
 917    }
 918
 919    /* Search the snapshot */
 920    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
 921    if (snapshot_index < 0) {
 922        error_setg(errp, "Can't find the snapshot");
 923        return -ENOENT;
 924    }
 925    sn = s->snapshots[snapshot_index];
 926
 927    ret = qcow2_validate_table(bs, sn.l1_table_offset, sn.l1_size,
 928                               L1E_SIZE, QCOW_MAX_L1_SIZE,
 929                               "Snapshot L1 table", errp);
 930    if (ret < 0) {
 931        return ret;
 932    }
 933
 934    /* Remove it from the snapshot list */
 935    memmove(s->snapshots + snapshot_index,
 936            s->snapshots + snapshot_index + 1,
 937            (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
 938    s->nb_snapshots--;
 939    ret = qcow2_write_snapshots(bs);
 940    if (ret < 0) {
 941        error_setg_errno(errp, -ret,
 942                         "Failed to remove snapshot from snapshot list");
 943        return ret;
 944    }
 945
 946    /*
 947     * The snapshot is now unused, clean up. If we fail after this point, we
 948     * won't recover but just leak clusters.
 949     */
 950    g_free(sn.unknown_extra_data);
 951    g_free(sn.id_str);
 952    g_free(sn.name);
 953
 954    /*
 955     * Now decrease the refcounts of clusters referenced by the snapshot and
 956     * free the L1 table.
 957     */
 958    ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
 959                                         sn.l1_size, -1);
 960    if (ret < 0) {
 961        error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
 962        return ret;
 963    }
 964    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * L1E_SIZE,
 965                        QCOW2_DISCARD_SNAPSHOT);
 966
 967    /* must update the copied flag on the current cluster offsets */
 968    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
 969    if (ret < 0) {
 970        error_setg_errno(errp, -ret,
 971                         "Failed to update snapshot status in disk");
 972        return ret;
 973    }
 974
 975#ifdef DEBUG_ALLOC
 976    {
 977        BdrvCheckResult result = {0};
 978        qcow2_check_refcounts(bs, &result, 0);
 979    }
 980#endif
 981    return 0;
 982}
 983
 984int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 985{
 986    BDRVQcow2State *s = bs->opaque;
 987    QEMUSnapshotInfo *sn_tab, *sn_info;
 988    QCowSnapshot *sn;
 989    int i;
 990
 991    if (has_data_file(bs)) {
 992        return -ENOTSUP;
 993    }
 994    if (!s->nb_snapshots) {
 995        *psn_tab = NULL;
 996        return s->nb_snapshots;
 997    }
 998
 999    sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
1000    for(i = 0; i < s->nb_snapshots; i++) {
1001        sn_info = sn_tab + i;
1002        sn = s->snapshots + i;
1003        pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
1004                sn->id_str);
1005        pstrcpy(sn_info->name, sizeof(sn_info->name),
1006                sn->name);
1007        sn_info->vm_state_size = sn->vm_state_size;
1008        sn_info->date_sec = sn->date_sec;
1009        sn_info->date_nsec = sn->date_nsec;
1010        sn_info->vm_clock_nsec = sn->vm_clock_nsec;
1011        sn_info->icount = sn->icount;
1012    }
1013    *psn_tab = sn_tab;
1014    return s->nb_snapshots;
1015}
1016
1017int qcow2_snapshot_load_tmp(BlockDriverState *bs,
1018                            const char *snapshot_id,
1019                            const char *name,
1020                            Error **errp)
1021{
1022    int i, snapshot_index;
1023    BDRVQcow2State *s = bs->opaque;
1024    QCowSnapshot *sn;
1025    uint64_t *new_l1_table;
1026    int new_l1_bytes;
1027    int ret;
1028
1029    assert(bdrv_is_read_only(bs));
1030
1031    /* Search the snapshot */
1032    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
1033    if (snapshot_index < 0) {
1034        error_setg(errp,
1035                   "Can't find snapshot");
1036        return -ENOENT;
1037    }
1038    sn = &s->snapshots[snapshot_index];
1039
1040    /* Allocate and read in the snapshot's L1 table */
1041    ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
1042                               L1E_SIZE, QCOW_MAX_L1_SIZE,
1043                               "Snapshot L1 table", errp);
1044    if (ret < 0) {
1045        return ret;
1046    }
1047    new_l1_bytes = sn->l1_size * L1E_SIZE;
1048    new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_bytes);
1049    if (new_l1_table == NULL) {
1050        return -ENOMEM;
1051    }
1052
1053    ret = bdrv_pread(bs->file, sn->l1_table_offset,
1054                     new_l1_table, new_l1_bytes);
1055    if (ret < 0) {
1056        error_setg(errp, "Failed to read l1 table for snapshot");
1057        qemu_vfree(new_l1_table);
1058        return ret;
1059    }
1060
1061    /* Switch the L1 table */
1062    qemu_vfree(s->l1_table);
1063
1064    s->l1_size = sn->l1_size;
1065    s->l1_table_offset = sn->l1_table_offset;
1066    s->l1_table = new_l1_table;
1067
1068    for(i = 0;i < s->l1_size; i++) {
1069        be64_to_cpus(&s->l1_table[i]);
1070    }
1071
1072    return 0;
1073}
1074