qemu/block/qcow2-snapshot.c
<<
>>
Prefs
   1/*
   2 * Block driver for the QCOW version 2 format
   3 *
   4 * Copyright (c) 2004-2006 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "sysemu/block-backend.h"
  27#include "qapi/error.h"
  28#include "qcow2.h"
  29#include "qemu/bswap.h"
  30#include "qemu/error-report.h"
  31#include "qemu/cutils.h"
  32#include "qemu/memalign.h"
  33
  34static void qcow2_free_single_snapshot(BlockDriverState *bs, int i)
  35{
  36    BDRVQcow2State *s = bs->opaque;
  37
  38    assert(i >= 0 && i < s->nb_snapshots);
  39    g_free(s->snapshots[i].name);
  40    g_free(s->snapshots[i].id_str);
  41    g_free(s->snapshots[i].unknown_extra_data);
  42    memset(&s->snapshots[i], 0, sizeof(s->snapshots[i]));
  43}
  44
  45void qcow2_free_snapshots(BlockDriverState *bs)
  46{
  47    BDRVQcow2State *s = bs->opaque;
  48    int i;
  49
  50    for(i = 0; i < s->nb_snapshots; i++) {
  51        qcow2_free_single_snapshot(bs, i);
  52    }
  53    g_free(s->snapshots);
  54    s->snapshots = NULL;
  55    s->nb_snapshots = 0;
  56}
  57
  58/*
  59 * If @repair is true, try to repair a broken snapshot table instead
  60 * of just returning an error:
  61 *
  62 * - If the snapshot table was too long, set *nb_clusters_reduced to
  63 *   the number of snapshots removed off the end.
  64 *   The caller will update the on-disk nb_snapshots accordingly;
  65 *   this leaks clusters, but is safe.
  66 *   (The on-disk information must be updated before
  67 *   qcow2_check_refcounts(), because that function relies on
  68 *   s->nb_snapshots to reflect the on-disk value.)
  69 *
  70 * - If there were snapshots with too much extra metadata, increment
  71 *   *extra_data_dropped for each.
  72 *   This requires the caller to eventually rewrite the whole snapshot
  73 *   table, which requires cluster allocation.  Therefore, this should
  74 *   be done only after qcow2_check_refcounts() made sure the refcount
  75 *   structures are valid.
  76 *   (In the meantime, the image is still valid because
  77 *   qcow2_check_refcounts() does not do anything with snapshots'
  78 *   extra data.)
  79 */
  80static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
  81                                   int *nb_clusters_reduced,
  82                                   int *extra_data_dropped,
  83                                   Error **errp)
  84{
  85    BDRVQcow2State *s = bs->opaque;
  86    QCowSnapshotHeader h;
  87    QCowSnapshotExtraData extra;
  88    QCowSnapshot *sn;
  89    int i, id_str_size, name_size;
  90    int64_t offset, pre_sn_offset;
  91    uint64_t table_length = 0;
  92    int ret;
  93
  94    if (!s->nb_snapshots) {
  95        s->snapshots = NULL;
  96        s->snapshots_size = 0;
  97        return 0;
  98    }
  99
 100    offset = s->snapshots_offset;
 101    s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
 102
 103    for(i = 0; i < s->nb_snapshots; i++) {
 104        bool truncate_unknown_extra_data = false;
 105
 106        pre_sn_offset = offset;
 107        table_length = ROUND_UP(table_length, 8);
 108
 109        /* Read statically sized part of the snapshot header */
 110        offset = ROUND_UP(offset, 8);
 111        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
 112        if (ret < 0) {
 113            error_setg_errno(errp, -ret, "Failed to read snapshot table");
 114            goto fail;
 115        }
 116
 117        offset += sizeof(h);
 118        sn = s->snapshots + i;
 119        sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
 120        sn->l1_size = be32_to_cpu(h.l1_size);
 121        sn->vm_state_size = be32_to_cpu(h.vm_state_size);
 122        sn->date_sec = be32_to_cpu(h.date_sec);
 123        sn->date_nsec = be32_to_cpu(h.date_nsec);
 124        sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
 125        sn->extra_data_size = be32_to_cpu(h.extra_data_size);
 126
 127        id_str_size = be16_to_cpu(h.id_str_size);
 128        name_size = be16_to_cpu(h.name_size);
 129
 130        if (sn->extra_data_size > QCOW_MAX_SNAPSHOT_EXTRA_DATA) {
 131            if (!repair) {
 132                ret = -EFBIG;
 133                error_setg(errp, "Too much extra metadata in snapshot table "
 134                           "entry %i", i);
 135                error_append_hint(errp, "You can force-remove this extra "
 136                                  "metadata with qemu-img check -r all\n");
 137                goto fail;
 138            }
 139
 140            fprintf(stderr, "Discarding too much extra metadata in snapshot "
 141                    "table entry %i (%" PRIu32 " > %u)\n",
 142                    i, sn->extra_data_size, QCOW_MAX_SNAPSHOT_EXTRA_DATA);
 143
 144            (*extra_data_dropped)++;
 145            truncate_unknown_extra_data = true;
 146        }
 147
 148        /* Read known extra data */
 149        ret = bdrv_pread(bs->file, offset, &extra,
 150                         MIN(sizeof(extra), sn->extra_data_size));
 151        if (ret < 0) {
 152            error_setg_errno(errp, -ret, "Failed to read snapshot table");
 153            goto fail;
 154        }
 155        offset += MIN(sizeof(extra), sn->extra_data_size);
 156
 157        if (sn->extra_data_size >= endof(QCowSnapshotExtraData,
 158                                         vm_state_size_large)) {
 159            sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
 160        }
 161
 162        if (sn->extra_data_size >= endof(QCowSnapshotExtraData, disk_size)) {
 163            sn->disk_size = be64_to_cpu(extra.disk_size);
 164        } else {
 165            sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
 166        }
 167
 168        if (sn->extra_data_size >= endof(QCowSnapshotExtraData, icount)) {
 169            sn->icount = be64_to_cpu(extra.icount);
 170        } else {
 171            sn->icount = -1ULL;
 172        }
 173
 174        if (sn->extra_data_size > sizeof(extra)) {
 175            uint64_t extra_data_end;
 176            size_t unknown_extra_data_size;
 177
 178            extra_data_end = offset + sn->extra_data_size - sizeof(extra);
 179
 180            if (truncate_unknown_extra_data) {
 181                sn->extra_data_size = QCOW_MAX_SNAPSHOT_EXTRA_DATA;
 182            }
 183
 184            /* Store unknown extra data */
 185            unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
 186            sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
 187            ret = bdrv_pread(bs->file, offset, sn->unknown_extra_data,
 188                             unknown_extra_data_size);
 189            if (ret < 0) {
 190                error_setg_errno(errp, -ret,
 191                                 "Failed to read snapshot table");
 192                goto fail;
 193            }
 194            offset = extra_data_end;
 195        }
 196
 197        /* Read snapshot ID */
 198        sn->id_str = g_malloc(id_str_size + 1);
 199        ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
 200        if (ret < 0) {
 201            error_setg_errno(errp, -ret, "Failed to read snapshot table");
 202            goto fail;
 203        }
 204        offset += id_str_size;
 205        sn->id_str[id_str_size] = '\0';
 206
 207        /* Read snapshot name */
 208        sn->name = g_malloc(name_size + 1);
 209        ret = bdrv_pread(bs->file, offset, sn->name, name_size);
 210        if (ret < 0) {
 211            error_setg_errno(errp, -ret, "Failed to read snapshot table");
 212            goto fail;
 213        }
 214        offset += name_size;
 215        sn->name[name_size] = '\0';
 216
 217        /* Note that the extra data may have been truncated */
 218        table_length += sizeof(h) + sn->extra_data_size + id_str_size +
 219                        name_size;
 220        if (!repair) {
 221            assert(table_length == offset - s->snapshots_offset);
 222        }
 223
 224        if (table_length > QCOW_MAX_SNAPSHOTS_SIZE ||
 225            offset - s->snapshots_offset > INT_MAX)
 226        {
 227            if (!repair) {
 228                ret = -EFBIG;
 229                error_setg(errp, "Snapshot table is too big");
 230                error_append_hint(errp, "You can force-remove all %u "
 231                                  "overhanging snapshots with qemu-img check "
 232                                  "-r all\n", s->nb_snapshots - i);
 233                goto fail;
 234            }
 235
 236            fprintf(stderr, "Discarding %u overhanging snapshots (snapshot "
 237                    "table is too big)\n", s->nb_snapshots - i);
 238
 239            *nb_clusters_reduced += (s->nb_snapshots - i);
 240
 241            /* Discard current snapshot also */
 242            qcow2_free_single_snapshot(bs, i);
 243
 244            /*
 245             * This leaks all the rest of the snapshot table and the
 246             * snapshots' clusters, but we run in check -r all mode,
 247             * so qcow2_check_refcounts() will take care of it.
 248             */
 249            s->nb_snapshots = i;
 250            offset = pre_sn_offset;
 251            break;
 252        }
 253    }
 254
 255    assert(offset - s->snapshots_offset <= INT_MAX);
 256    s->snapshots_size = offset - s->snapshots_offset;
 257    return 0;
 258
 259fail:
 260    qcow2_free_snapshots(bs);
 261    return ret;
 262}
 263
 264int qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
 265{
 266    return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
 267}
 268
 269/* add at the end of the file a new list of snapshots */
 270int qcow2_write_snapshots(BlockDriverState *bs)
 271{
 272    BDRVQcow2State *s = bs->opaque;
 273    QCowSnapshot *sn;
 274    QCowSnapshotHeader h;
 275    QCowSnapshotExtraData extra;
 276    int i, name_size, id_str_size, snapshots_size;
 277    struct {
 278        uint32_t nb_snapshots;
 279        uint64_t snapshots_offset;
 280    } QEMU_PACKED header_data;
 281    int64_t offset, snapshots_offset = 0;
 282    int ret;
 283
 284    /* compute the size of the snapshots */
 285    offset = 0;
 286    for(i = 0; i < s->nb_snapshots; i++) {
 287        sn = s->snapshots + i;
 288        offset = ROUND_UP(offset, 8);
 289        offset += sizeof(h);
 290        offset += MAX(sizeof(extra), sn->extra_data_size);
 291        offset += strlen(sn->id_str);
 292        offset += strlen(sn->name);
 293
 294        if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
 295            ret = -EFBIG;
 296            goto fail;
 297        }
 298    }
 299
 300    assert(offset <= INT_MAX);
 301    snapshots_size = offset;
 302
 303    /* Allocate space for the new snapshot list */
 304    snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
 305    offset = snapshots_offset;
 306    if (offset < 0) {
 307        ret = offset;
 308        goto fail;
 309    }
 310    ret = bdrv_flush(bs);
 311    if (ret < 0) {
 312        goto fail;
 313    }
 314
 315    /* The snapshot list position has not yet been updated, so these clusters
 316     * must indeed be completely free */
 317    ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false);
 318    if (ret < 0) {
 319        goto fail;
 320    }
 321
 322
 323    /* Write all snapshots to the new list */
 324    for(i = 0; i < s->nb_snapshots; i++) {
 325        sn = s->snapshots + i;
 326        memset(&h, 0, sizeof(h));
 327        h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
 328        h.l1_size = cpu_to_be32(sn->l1_size);
 329        /* If it doesn't fit in 32 bit, older implementations should treat it
 330         * as a disk-only snapshot rather than truncate the VM state */
 331        if (sn->vm_state_size <= 0xffffffff) {
 332            h.vm_state_size = cpu_to_be32(sn->vm_state_size);
 333        }
 334        h.date_sec = cpu_to_be32(sn->date_sec);
 335        h.date_nsec = cpu_to_be32(sn->date_nsec);
 336        h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
 337        h.extra_data_size = cpu_to_be32(MAX(sizeof(extra),
 338                                            sn->extra_data_size));
 339
 340        memset(&extra, 0, sizeof(extra));
 341        extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
 342        extra.disk_size = cpu_to_be64(sn->disk_size);
 343        extra.icount = cpu_to_be64(sn->icount);
 344
 345        id_str_size = strlen(sn->id_str);
 346        name_size = strlen(sn->name);
 347        assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
 348        h.id_str_size = cpu_to_be16(id_str_size);
 349        h.name_size = cpu_to_be16(name_size);
 350        offset = ROUND_UP(offset, 8);
 351
 352        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
 353        if (ret < 0) {
 354            goto fail;
 355        }
 356        offset += sizeof(h);
 357
 358        ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
 359        if (ret < 0) {
 360            goto fail;
 361        }
 362        offset += sizeof(extra);
 363
 364        if (sn->extra_data_size > sizeof(extra)) {
 365            size_t unknown_extra_data_size =
 366                sn->extra_data_size - sizeof(extra);
 367
 368            /* qcow2_read_snapshots() ensures no unbounded allocation */
 369            assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES);
 370            assert(sn->unknown_extra_data);
 371
 372            ret = bdrv_pwrite(bs->file, offset, sn->unknown_extra_data,
 373                              unknown_extra_data_size);
 374            if (ret < 0) {
 375                goto fail;
 376            }
 377            offset += unknown_extra_data_size;
 378        }
 379
 380        ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
 381        if (ret < 0) {
 382            goto fail;
 383        }
 384        offset += id_str_size;
 385
 386        ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
 387        if (ret < 0) {
 388            goto fail;
 389        }
 390        offset += name_size;
 391    }
 392
 393    /*
 394     * Update the header to point to the new snapshot table. This requires the
 395     * new table and its refcounts to be stable on disk.
 396     */
 397    ret = bdrv_flush(bs);
 398    if (ret < 0) {
 399        goto fail;
 400    }
 401
 402    QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
 403                      endof(QCowHeader, nb_snapshots));
 404
 405    header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
 406    header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
 407
 408    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
 409                           &header_data, sizeof(header_data));
 410    if (ret < 0) {
 411        goto fail;
 412    }
 413
 414    /* free the old snapshot table */
 415    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
 416                        QCOW2_DISCARD_SNAPSHOT);
 417    s->snapshots_offset = snapshots_offset;
 418    s->snapshots_size = snapshots_size;
 419    return 0;
 420
 421fail:
 422    if (snapshots_offset > 0) {
 423        qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
 424                            QCOW2_DISCARD_ALWAYS);
 425    }
 426    return ret;
 427}
 428
 429int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
 430                                                 BdrvCheckResult *result,
 431                                                 BdrvCheckMode fix)
 432{
 433    BDRVQcow2State *s = bs->opaque;
 434    Error *local_err = NULL;
 435    int nb_clusters_reduced = 0;
 436    int extra_data_dropped = 0;
 437    int ret;
 438    struct {
 439        uint32_t nb_snapshots;
 440        uint64_t snapshots_offset;
 441    } QEMU_PACKED snapshot_table_pointer;
 442
 443    /* qcow2_do_open() discards this information in check mode */
 444    ret = bdrv_pread(bs->file, offsetof(QCowHeader, nb_snapshots),
 445                     &snapshot_table_pointer, sizeof(snapshot_table_pointer));
 446    if (ret < 0) {
 447        result->check_errors++;
 448        fprintf(stderr, "ERROR failed to read the snapshot table pointer from "
 449                "the image header: %s\n", strerror(-ret));
 450        return ret;
 451    }
 452
 453    s->snapshots_offset = be64_to_cpu(snapshot_table_pointer.snapshots_offset);
 454    s->nb_snapshots = be32_to_cpu(snapshot_table_pointer.nb_snapshots);
 455
 456    if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS && (fix & BDRV_FIX_ERRORS)) {
 457        fprintf(stderr, "Discarding %u overhanging snapshots\n",
 458                s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
 459
 460        nb_clusters_reduced += s->nb_snapshots - QCOW_MAX_SNAPSHOTS;
 461        s->nb_snapshots = QCOW_MAX_SNAPSHOTS;
 462    }
 463
 464    ret = qcow2_validate_table(bs, s->snapshots_offset, s->nb_snapshots,
 465                               sizeof(QCowSnapshotHeader),
 466                               sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
 467                               "snapshot table", &local_err);
 468    if (ret < 0) {
 469        result->check_errors++;
 470        error_reportf_err(local_err, "ERROR ");
 471
 472        if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS) {
 473            fprintf(stderr, "You can force-remove all %u overhanging snapshots "
 474                    "with qemu-img check -r all\n",
 475                    s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
 476        }
 477
 478        /* We did not read the snapshot table, so invalidate this information */
 479        s->snapshots_offset = 0;
 480        s->nb_snapshots = 0;
 481
 482        return ret;
 483    }
 484
 485    qemu_co_mutex_unlock(&s->lock);
 486    ret = qcow2_do_read_snapshots(bs, fix & BDRV_FIX_ERRORS,
 487                                  &nb_clusters_reduced, &extra_data_dropped,
 488                                  &local_err);
 489    qemu_co_mutex_lock(&s->lock);
 490    if (ret < 0) {
 491        result->check_errors++;
 492        error_reportf_err(local_err,
 493                          "ERROR failed to read the snapshot table: ");
 494
 495        /* We did not read the snapshot table, so invalidate this information */
 496        s->snapshots_offset = 0;
 497        s->nb_snapshots = 0;
 498
 499        return ret;
 500    }
 501    result->corruptions += nb_clusters_reduced + extra_data_dropped;
 502
 503    if (nb_clusters_reduced) {
 504        /*
 505         * Update image header now, because:
 506         * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be
 507         *     the same as what the image header says,
 508         * (2) this leaks clusters, but qcow2_check_refcounts() will
 509         *     fix that.
 510         */
 511        assert(fix & BDRV_FIX_ERRORS);
 512
 513        snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
 514        ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
 515                               &snapshot_table_pointer.nb_snapshots,
 516                               sizeof(snapshot_table_pointer.nb_snapshots));
 517        if (ret < 0) {
 518            result->check_errors++;
 519            fprintf(stderr, "ERROR failed to update the snapshot count in the "
 520                    "image header: %s\n", strerror(-ret));
 521            return ret;
 522        }
 523
 524        result->corruptions_fixed += nb_clusters_reduced;
 525        result->corruptions -= nb_clusters_reduced;
 526    }
 527
 528    /*
 529     * All of v3 images' snapshot table entries need to have at least
 530     * 16 bytes of extra data.
 531     */
 532    if (s->qcow_version >= 3) {
 533        int i;
 534        for (i = 0; i < s->nb_snapshots; i++) {
 535            if (s->snapshots[i].extra_data_size <
 536                sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
 537                sizeof_field(QCowSnapshotExtraData, disk_size))
 538            {
 539                result->corruptions++;
 540                fprintf(stderr, "%s snapshot table entry %i is incomplete\n",
 541                        fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
 542            }
 543        }
 544    }
 545
 546    return 0;
 547}
 548
 549int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
 550                                                BdrvCheckResult *result,
 551                                                BdrvCheckMode fix)
 552{
 553    BDRVQcow2State *s = bs->opaque;
 554    int ret;
 555
 556    if (result->corruptions && (fix & BDRV_FIX_ERRORS)) {
 557        qemu_co_mutex_unlock(&s->lock);
 558        ret = qcow2_write_snapshots(bs);
 559        qemu_co_mutex_lock(&s->lock);
 560        if (ret < 0) {
 561            result->check_errors++;
 562            fprintf(stderr, "ERROR failed to update snapshot table: %s\n",
 563                    strerror(-ret));
 564            return ret;
 565        }
 566
 567        result->corruptions_fixed += result->corruptions;
 568        result->corruptions = 0;
 569    }
 570
 571    return 0;
 572}
 573
 574static void find_new_snapshot_id(BlockDriverState *bs,
 575                                 char *id_str, int id_str_size)
 576{
 577    BDRVQcow2State *s = bs->opaque;
 578    QCowSnapshot *sn;
 579    int i;
 580    unsigned long id, id_max = 0;
 581
 582    for(i = 0; i < s->nb_snapshots; i++) {
 583        sn = s->snapshots + i;
 584        id = strtoul(sn->id_str, NULL, 10);
 585        if (id > id_max)
 586            id_max = id;
 587    }
 588    snprintf(id_str, id_str_size, "%lu", id_max + 1);
 589}
 590
 591static int find_snapshot_by_id_and_name(BlockDriverState *bs,
 592                                        const char *id,
 593                                        const char *name)
 594{
 595    BDRVQcow2State *s = bs->opaque;
 596    int i;
 597
 598    if (id && name) {
 599        for (i = 0; i < s->nb_snapshots; i++) {
 600            if (!strcmp(s->snapshots[i].id_str, id) &&
 601                !strcmp(s->snapshots[i].name, name)) {
 602                return i;
 603            }
 604        }
 605    } else if (id) {
 606        for (i = 0; i < s->nb_snapshots; i++) {
 607            if (!strcmp(s->snapshots[i].id_str, id)) {
 608                return i;
 609            }
 610        }
 611    } else if (name) {
 612        for (i = 0; i < s->nb_snapshots; i++) {
 613            if (!strcmp(s->snapshots[i].name, name)) {
 614                return i;
 615            }
 616        }
 617    }
 618
 619    return -1;
 620}
 621
 622static int find_snapshot_by_id_or_name(BlockDriverState *bs,
 623                                       const char *id_or_name)
 624{
 625    int ret;
 626
 627    ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
 628    if (ret >= 0) {
 629        return ret;
 630    }
 631    return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
 632}
 633
 634/* if no id is provided, a new one is constructed */
 635int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 636{
 637    BDRVQcow2State *s = bs->opaque;
 638    QCowSnapshot *new_snapshot_list = NULL;
 639    QCowSnapshot *old_snapshot_list = NULL;
 640    QCowSnapshot sn1, *sn = &sn1;
 641    int i, ret;
 642    uint64_t *l1_table = NULL;
 643    int64_t l1_table_offset;
 644
 645    if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
 646        return -EFBIG;
 647    }
 648
 649    if (has_data_file(bs)) {
 650        return -ENOTSUP;
 651    }
 652
 653    memset(sn, 0, sizeof(*sn));
 654
 655    /* Generate an ID */
 656    find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
 657
 658    /* Populate sn with passed data */
 659    sn->id_str = g_strdup(sn_info->id_str);
 660    sn->name = g_strdup(sn_info->name);
 661
 662    sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
 663    sn->vm_state_size = sn_info->vm_state_size;
 664    sn->date_sec = sn_info->date_sec;
 665    sn->date_nsec = sn_info->date_nsec;
 666    sn->vm_clock_nsec = sn_info->vm_clock_nsec;
 667    sn->icount = sn_info->icount;
 668    sn->extra_data_size = sizeof(QCowSnapshotExtraData);
 669
 670    /* Allocate the L1 table of the snapshot and copy the current one there. */
 671    l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * L1E_SIZE);
 672    if (l1_table_offset < 0) {
 673        ret = l1_table_offset;
 674        goto fail;
 675    }
 676
 677    sn->l1_table_offset = l1_table_offset;
 678    sn->l1_size = s->l1_size;
 679
 680    l1_table = g_try_new(uint64_t, s->l1_size);
 681    if (s->l1_size && l1_table == NULL) {
 682        ret = -ENOMEM;
 683        goto fail;
 684    }
 685
 686    for(i = 0; i < s->l1_size; i++) {
 687        l1_table[i] = cpu_to_be64(s->l1_table[i]);
 688    }
 689
 690    ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
 691                                        s->l1_size * L1E_SIZE, false);
 692    if (ret < 0) {
 693        goto fail;
 694    }
 695
 696    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
 697                      s->l1_size * L1E_SIZE);
 698    if (ret < 0) {
 699        goto fail;
 700    }
 701
 702    g_free(l1_table);
 703    l1_table = NULL;
 704
 705    /*
 706     * Increase the refcounts of all clusters and make sure everything is
 707     * stable on disk before updating the snapshot table to contain a pointer
 708     * to the new L1 table.
 709     */
 710    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
 711    if (ret < 0) {
 712        goto fail;
 713    }
 714
 715    /* Append the new snapshot to the snapshot list */
 716    new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
 717    if (s->snapshots) {
 718        memcpy(new_snapshot_list, s->snapshots,
 719               s->nb_snapshots * sizeof(QCowSnapshot));
 720        old_snapshot_list = s->snapshots;
 721    }
 722    s->snapshots = new_snapshot_list;
 723    s->snapshots[s->nb_snapshots++] = *sn;
 724
 725    ret = qcow2_write_snapshots(bs);
 726    if (ret < 0) {
 727        g_free(s->snapshots);
 728        s->snapshots = old_snapshot_list;
 729        s->nb_snapshots--;
 730        goto fail;
 731    }
 732
 733    g_free(old_snapshot_list);
 734
 735    /* The VM state isn't needed any more in the active L1 table; in fact, it
 736     * hurts by causing expensive COW for the next snapshot. */
 737    qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
 738                          ROUND_UP(sn->vm_state_size, s->cluster_size),
 739                          QCOW2_DISCARD_NEVER, false);
 740
 741#ifdef DEBUG_ALLOC
 742    {
 743      BdrvCheckResult result = {0};
 744      qcow2_check_refcounts(bs, &result, 0);
 745    }
 746#endif
 747    return 0;
 748
 749fail:
 750    g_free(sn->id_str);
 751    g_free(sn->name);
 752    g_free(l1_table);
 753
 754    return ret;
 755}
 756
 757/* copy the snapshot 'snapshot_name' into the current disk image */
 758int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 759{
 760    BDRVQcow2State *s = bs->opaque;
 761    QCowSnapshot *sn;
 762    Error *local_err = NULL;
 763    int i, snapshot_index;
 764    int cur_l1_bytes, sn_l1_bytes;
 765    int ret;
 766    uint64_t *sn_l1_table = NULL;
 767
 768    if (has_data_file(bs)) {
 769        return -ENOTSUP;
 770    }
 771
 772    /* Search the snapshot */
 773    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
 774    if (snapshot_index < 0) {
 775        return -ENOENT;
 776    }
 777    sn = &s->snapshots[snapshot_index];
 778
 779    ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
 780                               L1E_SIZE, QCOW_MAX_L1_SIZE,
 781                               "Snapshot L1 table", &local_err);
 782    if (ret < 0) {
 783        error_report_err(local_err);
 784        goto fail;
 785    }
 786
 787    if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
 788        BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL,
 789                                            &local_err);
 790        if (!blk) {
 791            error_report_err(local_err);
 792            ret = -ENOTSUP;
 793            goto fail;
 794        }
 795
 796        ret = blk_truncate(blk, sn->disk_size, true, PREALLOC_MODE_OFF, 0,
 797                           &local_err);
 798        blk_unref(blk);
 799        if (ret < 0) {
 800            error_report_err(local_err);
 801            goto fail;
 802        }
 803    }
 804
 805    /*
 806     * Make sure that the current L1 table is big enough to contain the whole
 807     * L1 table of the snapshot. If the snapshot L1 table is smaller, the
 808     * current one must be padded with zeros.
 809     */
 810    ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
 811    if (ret < 0) {
 812        goto fail;
 813    }
 814
 815    cur_l1_bytes = s->l1_size * L1E_SIZE;
 816    sn_l1_bytes = sn->l1_size * L1E_SIZE;
 817
 818    /*
 819     * Copy the snapshot L1 table to the current L1 table.
 820     *
 821     * Before overwriting the old current L1 table on disk, make sure to
 822     * increase all refcounts for the clusters referenced by the new one.
 823     * Decrease the refcount referenced by the old one only when the L1
 824     * table is overwritten.
 825     */
 826    sn_l1_table = g_try_malloc0(cur_l1_bytes);
 827    if (cur_l1_bytes && sn_l1_table == NULL) {
 828        ret = -ENOMEM;
 829        goto fail;
 830    }
 831
 832    ret = bdrv_pread(bs->file, sn->l1_table_offset,
 833                     sn_l1_table, sn_l1_bytes);
 834    if (ret < 0) {
 835        goto fail;
 836    }
 837
 838    ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
 839                                         sn->l1_size, 1);
 840    if (ret < 0) {
 841        goto fail;
 842    }
 843
 844    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
 845                                        s->l1_table_offset, cur_l1_bytes,
 846                                        false);
 847    if (ret < 0) {
 848        goto fail;
 849    }
 850
 851    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
 852                           cur_l1_bytes);
 853    if (ret < 0) {
 854        goto fail;
 855    }
 856
 857    /*
 858     * Decrease refcount of clusters of current L1 table.
 859     *
 860     * At this point, the in-memory s->l1_table points to the old L1 table,
 861     * whereas on disk we already have the new one.
 862     *
 863     * qcow2_update_snapshot_refcount special cases the current L1 table to use
 864     * the in-memory data instead of really using the offset to load a new one,
 865     * which is why this works.
 866     */
 867    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
 868                                         s->l1_size, -1);
 869
 870    /*
 871     * Now update the in-memory L1 table to be in sync with the on-disk one. We
 872     * need to do this even if updating refcounts failed.
 873     */
 874    for(i = 0;i < s->l1_size; i++) {
 875        s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
 876    }
 877
 878    if (ret < 0) {
 879        goto fail;
 880    }
 881
 882    g_free(sn_l1_table);
 883    sn_l1_table = NULL;
 884
 885    /*
 886     * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
 887     * when we decreased the refcount of the old snapshot.
 888     */
 889    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
 890    if (ret < 0) {
 891        goto fail;
 892    }
 893
 894#ifdef DEBUG_ALLOC
 895    {
 896        BdrvCheckResult result = {0};
 897        qcow2_check_refcounts(bs, &result, 0);
 898    }
 899#endif
 900    return 0;
 901
 902fail:
 903    g_free(sn_l1_table);
 904    return ret;
 905}
 906
 907int qcow2_snapshot_delete(BlockDriverState *bs,
 908                          const char *snapshot_id,
 909                          const char *name,
 910                          Error **errp)
 911{
 912    BDRVQcow2State *s = bs->opaque;
 913    QCowSnapshot sn;
 914    int snapshot_index, ret;
 915
 916    if (has_data_file(bs)) {
 917        return -ENOTSUP;
 918    }
 919
 920    /* Search the snapshot */
 921    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
 922    if (snapshot_index < 0) {
 923        error_setg(errp, "Can't find the snapshot");
 924        return -ENOENT;
 925    }
 926    sn = s->snapshots[snapshot_index];
 927
 928    ret = qcow2_validate_table(bs, sn.l1_table_offset, sn.l1_size,
 929                               L1E_SIZE, QCOW_MAX_L1_SIZE,
 930                               "Snapshot L1 table", errp);
 931    if (ret < 0) {
 932        return ret;
 933    }
 934
 935    /* Remove it from the snapshot list */
 936    memmove(s->snapshots + snapshot_index,
 937            s->snapshots + snapshot_index + 1,
 938            (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
 939    s->nb_snapshots--;
 940    ret = qcow2_write_snapshots(bs);
 941    if (ret < 0) {
 942        error_setg_errno(errp, -ret,
 943                         "Failed to remove snapshot from snapshot list");
 944        return ret;
 945    }
 946
 947    /*
 948     * The snapshot is now unused, clean up. If we fail after this point, we
 949     * won't recover but just leak clusters.
 950     */
 951    g_free(sn.unknown_extra_data);
 952    g_free(sn.id_str);
 953    g_free(sn.name);
 954
 955    /*
 956     * Now decrease the refcounts of clusters referenced by the snapshot and
 957     * free the L1 table.
 958     */
 959    ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
 960                                         sn.l1_size, -1);
 961    if (ret < 0) {
 962        error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
 963        return ret;
 964    }
 965    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * L1E_SIZE,
 966                        QCOW2_DISCARD_SNAPSHOT);
 967
 968    /* must update the copied flag on the current cluster offsets */
 969    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
 970    if (ret < 0) {
 971        error_setg_errno(errp, -ret,
 972                         "Failed to update snapshot status in disk");
 973        return ret;
 974    }
 975
 976#ifdef DEBUG_ALLOC
 977    {
 978        BdrvCheckResult result = {0};
 979        qcow2_check_refcounts(bs, &result, 0);
 980    }
 981#endif
 982    return 0;
 983}
 984
 985int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 986{
 987    BDRVQcow2State *s = bs->opaque;
 988    QEMUSnapshotInfo *sn_tab, *sn_info;
 989    QCowSnapshot *sn;
 990    int i;
 991
 992    if (has_data_file(bs)) {
 993        return -ENOTSUP;
 994    }
 995    if (!s->nb_snapshots) {
 996        *psn_tab = NULL;
 997        return s->nb_snapshots;
 998    }
 999
1000    sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
1001    for(i = 0; i < s->nb_snapshots; i++) {
1002        sn_info = sn_tab + i;
1003        sn = s->snapshots + i;
1004        pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
1005                sn->id_str);
1006        pstrcpy(sn_info->name, sizeof(sn_info->name),
1007                sn->name);
1008        sn_info->vm_state_size = sn->vm_state_size;
1009        sn_info->date_sec = sn->date_sec;
1010        sn_info->date_nsec = sn->date_nsec;
1011        sn_info->vm_clock_nsec = sn->vm_clock_nsec;
1012        sn_info->icount = sn->icount;
1013    }
1014    *psn_tab = sn_tab;
1015    return s->nb_snapshots;
1016}
1017
1018int qcow2_snapshot_load_tmp(BlockDriverState *bs,
1019                            const char *snapshot_id,
1020                            const char *name,
1021                            Error **errp)
1022{
1023    int i, snapshot_index;
1024    BDRVQcow2State *s = bs->opaque;
1025    QCowSnapshot *sn;
1026    uint64_t *new_l1_table;
1027    int new_l1_bytes;
1028    int ret;
1029
1030    assert(bdrv_is_read_only(bs));
1031
1032    /* Search the snapshot */
1033    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
1034    if (snapshot_index < 0) {
1035        error_setg(errp,
1036                   "Can't find snapshot");
1037        return -ENOENT;
1038    }
1039    sn = &s->snapshots[snapshot_index];
1040
1041    /* Allocate and read in the snapshot's L1 table */
1042    ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
1043                               L1E_SIZE, QCOW_MAX_L1_SIZE,
1044                               "Snapshot L1 table", errp);
1045    if (ret < 0) {
1046        return ret;
1047    }
1048    new_l1_bytes = sn->l1_size * L1E_SIZE;
1049    new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_bytes);
1050    if (new_l1_table == NULL) {
1051        return -ENOMEM;
1052    }
1053
1054    ret = bdrv_pread(bs->file, sn->l1_table_offset,
1055                     new_l1_table, new_l1_bytes);
1056    if (ret < 0) {
1057        error_setg(errp, "Failed to read l1 table for snapshot");
1058        qemu_vfree(new_l1_table);
1059        return ret;
1060    }
1061
1062    /* Switch the L1 table */
1063    qemu_vfree(s->l1_table);
1064
1065    s->l1_size = sn->l1_size;
1066    s->l1_table_offset = sn->l1_table_offset;
1067    s->l1_table = new_l1_table;
1068
1069    for(i = 0;i < s->l1_size; i++) {
1070        be64_to_cpus(&s->l1_table[i]);
1071    }
1072
1073    return 0;
1074}
1075