qemu/block/commit.c
<<
>>
Prefs
   1/*
   2 * Live block commit
   3 *
   4 * Copyright Red Hat, Inc. 2012
   5 *
   6 * Authors:
   7 *  Jeff Cody   <jcody@redhat.com>
   8 *  Based on stream.c by Stefan Hajnoczi
   9 *
  10 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
  11 * See the COPYING.LIB file in the top-level directory.
  12 *
  13 */
  14
  15#include "qemu/osdep.h"
  16#include "trace.h"
  17#include "block/block_int.h"
  18#include "block/blockjob_int.h"
  19#include "qapi/error.h"
  20#include "qapi/qmp/qerror.h"
  21#include "qemu/ratelimit.h"
  22#include "sysemu/block-backend.h"
  23
  24enum {
  25    /*
  26     * Size of data buffer for populating the image file.  This should be large
  27     * enough to process multiple clusters in a single call, so that populating
  28     * contiguous regions of the image is efficient.
  29     */
  30    COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
  31};
  32
  33#define SLICE_TIME 100000000ULL /* ns */
  34
  35typedef struct CommitBlockJob {
  36    BlockJob common;
  37    RateLimit limit;
  38    BlockDriverState *active;
  39    BlockBackend *top;
  40    BlockBackend *base;
  41    BlockdevOnError on_error;
  42    int base_flags;
  43    int orig_overlay_flags;
  44    char *backing_file_str;
  45} CommitBlockJob;
  46
  47static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
  48                                        int64_t sector_num, int nb_sectors,
  49                                        void *buf)
  50{
  51    int ret = 0;
  52    QEMUIOVector qiov;
  53    struct iovec iov = {
  54        .iov_base = buf,
  55        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
  56    };
  57
  58    qemu_iovec_init_external(&qiov, &iov, 1);
  59
  60    ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE,
  61                        qiov.size, &qiov, 0);
  62    if (ret < 0) {
  63        return ret;
  64    }
  65
  66    ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE,
  67                         qiov.size, &qiov, 0);
  68    if (ret < 0) {
  69        return ret;
  70    }
  71
  72    return 0;
  73}
  74
  75typedef struct {
  76    int ret;
  77} CommitCompleteData;
  78
  79static void commit_complete(BlockJob *job, void *opaque)
  80{
  81    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
  82    CommitCompleteData *data = opaque;
  83    BlockDriverState *active = s->active;
  84    BlockDriverState *top = blk_bs(s->top);
  85    BlockDriverState *base = blk_bs(s->base);
  86    BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
  87    int ret = data->ret;
  88
  89    if (!block_job_is_cancelled(&s->common) && ret == 0) {
  90        /* success */
  91        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
  92    }
  93
  94    /* restore base open flags here if appropriate (e.g., change the base back
  95     * to r/o). These reopens do not need to be atomic, since we won't abort
  96     * even on failure here */
  97    if (s->base_flags != bdrv_get_flags(base)) {
  98        bdrv_reopen(base, s->base_flags, NULL);
  99    }
 100    if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
 101        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
 102    }
 103    g_free(s->backing_file_str);
 104    blk_unref(s->top);
 105    blk_unref(s->base);
 106    block_job_completed(&s->common, ret);
 107    g_free(data);
 108}
 109
 110static void coroutine_fn commit_run(void *opaque)
 111{
 112    CommitBlockJob *s = opaque;
 113    CommitCompleteData *data;
 114    int64_t sector_num, end;
 115    uint64_t delay_ns = 0;
 116    int ret = 0;
 117    int n = 0;
 118    void *buf = NULL;
 119    int bytes_written = 0;
 120    int64_t base_len;
 121
 122    ret = s->common.len = blk_getlength(s->top);
 123
 124
 125    if (s->common.len < 0) {
 126        goto out;
 127    }
 128
 129    ret = base_len = blk_getlength(s->base);
 130    if (base_len < 0) {
 131        goto out;
 132    }
 133
 134    if (base_len < s->common.len) {
 135        ret = blk_truncate(s->base, s->common.len);
 136        if (ret) {
 137            goto out;
 138        }
 139    }
 140
 141    end = s->common.len >> BDRV_SECTOR_BITS;
 142    buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
 143
 144    for (sector_num = 0; sector_num < end; sector_num += n) {
 145        bool copy;
 146
 147        /* Note that even when no rate limit is applied we need to yield
 148         * with no pending I/O here so that bdrv_drain_all() returns.
 149         */
 150        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
 151        if (block_job_is_cancelled(&s->common)) {
 152            break;
 153        }
 154        /* Copy if allocated above the base */
 155        ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
 156                                      sector_num,
 157                                      COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
 158                                      &n);
 159        copy = (ret == 1);
 160        trace_commit_one_iteration(s, sector_num, n, ret);
 161        if (copy) {
 162            ret = commit_populate(s->top, s->base, sector_num, n, buf);
 163            bytes_written += n * BDRV_SECTOR_SIZE;
 164        }
 165        if (ret < 0) {
 166            BlockErrorAction action =
 167                block_job_error_action(&s->common, false, s->on_error, -ret);
 168            if (action == BLOCK_ERROR_ACTION_REPORT) {
 169                goto out;
 170            } else {
 171                n = 0;
 172                continue;
 173            }
 174        }
 175        /* Publish progress */
 176        s->common.offset += n * BDRV_SECTOR_SIZE;
 177
 178        if (copy && s->common.speed) {
 179            delay_ns = ratelimit_calculate_delay(&s->limit, n);
 180        }
 181    }
 182
 183    ret = 0;
 184
 185out:
 186    qemu_vfree(buf);
 187
 188    data = g_malloc(sizeof(*data));
 189    data->ret = ret;
 190    block_job_defer_to_main_loop(&s->common, commit_complete, data);
 191}
 192
 193static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
 194{
 195    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
 196
 197    if (speed < 0) {
 198        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
 199        return;
 200    }
 201    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 202}
 203
 204static const BlockJobDriver commit_job_driver = {
 205    .instance_size = sizeof(CommitBlockJob),
 206    .job_type      = BLOCK_JOB_TYPE_COMMIT,
 207    .set_speed     = commit_set_speed,
 208    .start         = commit_run,
 209};
 210
 211void commit_start(const char *job_id, BlockDriverState *bs,
 212                  BlockDriverState *base, BlockDriverState *top, int64_t speed,
 213                  BlockdevOnError on_error, const char *backing_file_str,
 214                  Error **errp)
 215{
 216    CommitBlockJob *s;
 217    BlockReopenQueue *reopen_queue = NULL;
 218    int orig_overlay_flags;
 219    int orig_base_flags;
 220    BlockDriverState *iter;
 221    BlockDriverState *overlay_bs;
 222    Error *local_err = NULL;
 223
 224    assert(top != bs);
 225    if (top == base) {
 226        error_setg(errp, "Invalid files for merge: top and base are the same");
 227        return;
 228    }
 229
 230    overlay_bs = bdrv_find_overlay(bs, top);
 231
 232    if (overlay_bs == NULL) {
 233        error_setg(errp, "Could not find overlay image for %s:", top->filename);
 234        return;
 235    }
 236
 237    s = block_job_create(job_id, &commit_job_driver, bs, speed,
 238                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
 239    if (!s) {
 240        return;
 241    }
 242
 243    orig_base_flags    = bdrv_get_flags(base);
 244    orig_overlay_flags = bdrv_get_flags(overlay_bs);
 245
 246    /* convert base & overlay_bs to r/w, if necessary */
 247    if (!(orig_base_flags & BDRV_O_RDWR)) {
 248        reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
 249                                         orig_base_flags | BDRV_O_RDWR);
 250    }
 251    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
 252        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
 253                                         orig_overlay_flags | BDRV_O_RDWR);
 254    }
 255    if (reopen_queue) {
 256        bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
 257        if (local_err != NULL) {
 258            error_propagate(errp, local_err);
 259            block_job_unref(&s->common);
 260            return;
 261        }
 262    }
 263
 264
 265    /* Block all nodes between top and base, because they will
 266     * disappear from the chain after this operation. */
 267    assert(bdrv_chain_contains(top, base));
 268    for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
 269        block_job_add_bdrv(&s->common, iter);
 270    }
 271    /* overlay_bs must be blocked because it needs to be modified to
 272     * update the backing image string, but if it's the root node then
 273     * don't block it again */
 274    if (bs != overlay_bs) {
 275        block_job_add_bdrv(&s->common, overlay_bs);
 276    }
 277
 278    s->base = blk_new();
 279    blk_insert_bs(s->base, base);
 280
 281    s->top = blk_new();
 282    blk_insert_bs(s->top, top);
 283
 284    s->active = bs;
 285
 286    s->base_flags          = orig_base_flags;
 287    s->orig_overlay_flags  = orig_overlay_flags;
 288
 289    s->backing_file_str = g_strdup(backing_file_str);
 290
 291    s->on_error = on_error;
 292
 293    trace_commit_start(bs, base, top, s);
 294    block_job_start(&s->common);
 295}
 296
 297
 298#define COMMIT_BUF_SECTORS 2048
 299
 300/* commit COW file into the raw image */
 301int bdrv_commit(BlockDriverState *bs)
 302{
 303    BlockBackend *src, *backing;
 304    BlockDriver *drv = bs->drv;
 305    int64_t sector, total_sectors, length, backing_length;
 306    int n, ro, open_flags;
 307    int ret = 0;
 308    uint8_t *buf = NULL;
 309
 310    if (!drv)
 311        return -ENOMEDIUM;
 312
 313    if (!bs->backing) {
 314        return -ENOTSUP;
 315    }
 316
 317    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
 318        bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
 319        return -EBUSY;
 320    }
 321
 322    ro = bs->backing->bs->read_only;
 323    open_flags =  bs->backing->bs->open_flags;
 324
 325    if (ro) {
 326        if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
 327            return -EACCES;
 328        }
 329    }
 330
 331    src = blk_new();
 332    blk_insert_bs(src, bs);
 333
 334    backing = blk_new();
 335    blk_insert_bs(backing, bs->backing->bs);
 336
 337    length = blk_getlength(src);
 338    if (length < 0) {
 339        ret = length;
 340        goto ro_cleanup;
 341    }
 342
 343    backing_length = blk_getlength(backing);
 344    if (backing_length < 0) {
 345        ret = backing_length;
 346        goto ro_cleanup;
 347    }
 348
 349    /* If our top snapshot is larger than the backing file image,
 350     * grow the backing file image if possible.  If not possible,
 351     * we must return an error */
 352    if (length > backing_length) {
 353        ret = blk_truncate(backing, length);
 354        if (ret < 0) {
 355            goto ro_cleanup;
 356        }
 357    }
 358
 359    total_sectors = length >> BDRV_SECTOR_BITS;
 360
 361    /* blk_try_blockalign() for src will choose an alignment that works for
 362     * backing as well, so no need to compare the alignment manually. */
 363    buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
 364    if (buf == NULL) {
 365        ret = -ENOMEM;
 366        goto ro_cleanup;
 367    }
 368
 369    for (sector = 0; sector < total_sectors; sector += n) {
 370        ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
 371        if (ret < 0) {
 372            goto ro_cleanup;
 373        }
 374        if (ret) {
 375            ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
 376                            n * BDRV_SECTOR_SIZE);
 377            if (ret < 0) {
 378                goto ro_cleanup;
 379            }
 380
 381            ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
 382                             n * BDRV_SECTOR_SIZE, 0);
 383            if (ret < 0) {
 384                goto ro_cleanup;
 385            }
 386        }
 387    }
 388
 389    if (drv->bdrv_make_empty) {
 390        ret = drv->bdrv_make_empty(bs);
 391        if (ret < 0) {
 392            goto ro_cleanup;
 393        }
 394        blk_flush(src);
 395    }
 396
 397    /*
 398     * Make sure all data we wrote to the backing device is actually
 399     * stable on disk.
 400     */
 401    blk_flush(backing);
 402
 403    ret = 0;
 404ro_cleanup:
 405    qemu_vfree(buf);
 406
 407    blk_unref(src);
 408    blk_unref(backing);
 409
 410    if (ro) {
 411        /* ignoring error return here */
 412        bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
 413    }
 414
 415    return ret;
 416}
 417