qemu/block/stream.c
<<
>>
Prefs
   1/*
   2 * Image streaming
   3 *
   4 * Copyright IBM, Corp. 2011
   5 *
   6 * Authors:
   7 *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
  10 * See the COPYING.LIB file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "trace.h"
  16#include "block/block_int.h"
  17#include "block/blockjob_int.h"
  18#include "qapi/error.h"
  19#include "qapi/qmp/qerror.h"
  20#include "qemu/ratelimit.h"
  21#include "sysemu/block-backend.h"
  22
  23enum {
  24    /*
  25     * Size of data buffer for populating the image file.  This should be large
  26     * enough to process multiple clusters in a single call, so that populating
  27     * contiguous regions of the image is efficient.
  28     */
  29    STREAM_BUFFER_SIZE = 512 * 1024, /* in bytes */
  30};
  31
  32typedef struct StreamBlockJob {
  33    BlockJob common;
  34    BlockDriverState *base;
  35    BlockdevOnError on_error;
  36    char *backing_file_str;
  37    int bs_flags;
  38} StreamBlockJob;
  39
  40static int coroutine_fn stream_populate(BlockBackend *blk,
  41                                        int64_t offset, uint64_t bytes,
  42                                        void *buf)
  43{
  44    struct iovec iov = {
  45        .iov_base = buf,
  46        .iov_len  = bytes,
  47    };
  48    QEMUIOVector qiov;
  49
  50    assert(bytes < SIZE_MAX);
  51    qemu_iovec_init_external(&qiov, &iov, 1);
  52
  53    /* Copy-on-read the unallocated clusters */
  54    return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
  55}
  56
  57typedef struct {
  58    int ret;
  59} StreamCompleteData;
  60
  61static void stream_complete(Job *job, void *opaque)
  62{
  63    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
  64    BlockJob *bjob = &s->common;
  65    StreamCompleteData *data = opaque;
  66    BlockDriverState *bs = blk_bs(bjob->blk);
  67    BlockDriverState *base = s->base;
  68    Error *local_err = NULL;
  69
  70    if (!job_is_cancelled(job) && bs->backing && data->ret == 0) {
  71        const char *base_id = NULL, *base_fmt = NULL;
  72        if (base) {
  73            base_id = s->backing_file_str;
  74            if (base->drv) {
  75                base_fmt = base->drv->format_name;
  76            }
  77        }
  78        data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
  79        bdrv_set_backing_hd(bs, base, &local_err);
  80        if (local_err) {
  81            error_report_err(local_err);
  82            data->ret = -EPERM;
  83            goto out;
  84        }
  85    }
  86
  87out:
  88    /* Reopen the image back in read-only mode if necessary */
  89    if (s->bs_flags != bdrv_get_flags(bs)) {
  90        /* Give up write permissions before making it read-only */
  91        blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
  92        bdrv_reopen(bs, s->bs_flags, NULL);
  93    }
  94
  95    g_free(s->backing_file_str);
  96    job_completed(job, data->ret, NULL);
  97    g_free(data);
  98}
  99
 100static void coroutine_fn stream_run(void *opaque)
 101{
 102    StreamBlockJob *s = opaque;
 103    StreamCompleteData *data;
 104    BlockBackend *blk = s->common.blk;
 105    BlockDriverState *bs = blk_bs(blk);
 106    BlockDriverState *base = s->base;
 107    int64_t len;
 108    int64_t offset = 0;
 109    uint64_t delay_ns = 0;
 110    int error = 0;
 111    int ret = 0;
 112    int64_t n = 0; /* bytes */
 113    void *buf;
 114
 115    if (!bs->backing) {
 116        goto out;
 117    }
 118
 119    len = bdrv_getlength(bs);
 120    if (len < 0) {
 121        ret = len;
 122        goto out;
 123    }
 124    job_progress_set_remaining(&s->common.job, len);
 125
 126    buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE);
 127
 128    /* Turn on copy-on-read for the whole block device so that guest read
 129     * requests help us make progress.  Only do this when copying the entire
 130     * backing chain since the copy-on-read operation does not take base into
 131     * account.
 132     */
 133    if (!base) {
 134        bdrv_enable_copy_on_read(bs);
 135    }
 136
 137    for ( ; offset < len; offset += n) {
 138        bool copy;
 139
 140        /* Note that even when no rate limit is applied we need to yield
 141         * with no pending I/O here so that bdrv_drain_all() returns.
 142         */
 143        job_sleep_ns(&s->common.job, delay_ns);
 144        if (job_is_cancelled(&s->common.job)) {
 145            break;
 146        }
 147
 148        copy = false;
 149
 150        ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &n);
 151        if (ret == 1) {
 152            /* Allocated in the top, no need to copy.  */
 153        } else if (ret >= 0) {
 154            /* Copy if allocated in the intermediate images.  Limit to the
 155             * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).  */
 156            ret = bdrv_is_allocated_above(backing_bs(bs), base,
 157                                          offset, n, &n);
 158
 159            /* Finish early if end of backing file has been reached */
 160            if (ret == 0 && n == 0) {
 161                n = len - offset;
 162            }
 163
 164            copy = (ret == 1);
 165        }
 166        trace_stream_one_iteration(s, offset, n, ret);
 167        if (copy) {
 168            ret = stream_populate(blk, offset, n, buf);
 169        }
 170        if (ret < 0) {
 171            BlockErrorAction action =
 172                block_job_error_action(&s->common, s->on_error, true, -ret);
 173            if (action == BLOCK_ERROR_ACTION_STOP) {
 174                n = 0;
 175                continue;
 176            }
 177            if (error == 0) {
 178                error = ret;
 179            }
 180            if (action == BLOCK_ERROR_ACTION_REPORT) {
 181                break;
 182            }
 183        }
 184        ret = 0;
 185
 186        /* Publish progress */
 187        job_progress_update(&s->common.job, n);
 188        if (copy) {
 189            delay_ns = block_job_ratelimit_get_delay(&s->common, n);
 190        } else {
 191            delay_ns = 0;
 192        }
 193    }
 194
 195    if (!base) {
 196        bdrv_disable_copy_on_read(bs);
 197    }
 198
 199    /* Do not remove the backing file if an error was there but ignored.  */
 200    ret = error;
 201
 202    qemu_vfree(buf);
 203
 204out:
 205    /* Modify backing chain and close BDSes in main loop */
 206    data = g_malloc(sizeof(*data));
 207    data->ret = ret;
 208    job_defer_to_main_loop(&s->common.job, stream_complete, data);
 209}
 210
 211static const BlockJobDriver stream_job_driver = {
 212    .job_driver = {
 213        .instance_size = sizeof(StreamBlockJob),
 214        .job_type      = JOB_TYPE_STREAM,
 215        .free          = block_job_free,
 216        .start         = stream_run,
 217        .user_resume   = block_job_user_resume,
 218        .drain         = block_job_drain,
 219    },
 220};
 221
 222void stream_start(const char *job_id, BlockDriverState *bs,
 223                  BlockDriverState *base, const char *backing_file_str,
 224                  int64_t speed, BlockdevOnError on_error, Error **errp)
 225{
 226    StreamBlockJob *s;
 227    BlockDriverState *iter;
 228    int orig_bs_flags;
 229
 230    /* Make sure that the image is opened in read-write mode */
 231    orig_bs_flags = bdrv_get_flags(bs);
 232    if (!(orig_bs_flags & BDRV_O_RDWR)) {
 233        if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
 234            return;
 235        }
 236    }
 237
 238    /* Prevent concurrent jobs trying to modify the graph structure here, we
 239     * already have our own plans. Also don't allow resize as the image size is
 240     * queried only at the job start and then cached. */
 241    s = block_job_create(job_id, &stream_job_driver, NULL, bs,
 242                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
 243                         BLK_PERM_GRAPH_MOD,
 244                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
 245                         BLK_PERM_WRITE,
 246                         speed, JOB_DEFAULT, NULL, NULL, errp);
 247    if (!s) {
 248        goto fail;
 249    }
 250
 251    /* Block all intermediate nodes between bs and base, because they will
 252     * disappear from the chain after this operation. The streaming job reads
 253     * every block only once, assuming that it doesn't change, so block writes
 254     * and resizes. */
 255    for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
 256        block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
 257                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
 258                           &error_abort);
 259    }
 260
 261    s->base = base;
 262    s->backing_file_str = g_strdup(backing_file_str);
 263    s->bs_flags = orig_bs_flags;
 264
 265    s->on_error = on_error;
 266    trace_stream_start(bs, base, s);
 267    job_start(&s->common.job);
 268    return;
 269
 270fail:
 271    if (orig_bs_flags != bdrv_get_flags(bs)) {
 272        bdrv_reopen(bs, orig_bs_flags, NULL);
 273    }
 274}
 275