qemu/block/block-copy.c
<<
>>
Prefs
   1/*
   2 * block_copy API
   3 *
   4 * Copyright (C) 2013 Proxmox Server Solutions
   5 * Copyright (c) 2019 Virtuozzo International GmbH.
   6 *
   7 * Authors:
   8 *  Dietmar Maurer (dietmar@proxmox.com)
   9 *  Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12 * See the COPYING file in the top-level directory.
  13 */
  14
  15#include "qemu/osdep.h"
  16
  17#include "trace.h"
  18#include "qapi/error.h"
  19#include "block/block-copy.h"
  20#include "sysemu/block-backend.h"
  21#include "qemu/units.h"
  22
  23#define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
  24#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
  25#define BLOCK_COPY_MAX_MEM (128 * MiB)
  26
  27static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
  28                                                       int64_t start,
  29                                                       int64_t end)
  30{
  31    BlockCopyInFlightReq *req;
  32    bool waited;
  33
  34    do {
  35        waited = false;
  36        QLIST_FOREACH(req, &s->inflight_reqs, list) {
  37            if (end > req->start_byte && start < req->end_byte) {
  38                qemu_co_queue_wait(&req->wait_queue, NULL);
  39                waited = true;
  40                break;
  41            }
  42        }
  43    } while (waited);
  44}
  45
  46static void block_copy_inflight_req_begin(BlockCopyState *s,
  47                                          BlockCopyInFlightReq *req,
  48                                          int64_t start, int64_t end)
  49{
  50    req->start_byte = start;
  51    req->end_byte = end;
  52    qemu_co_queue_init(&req->wait_queue);
  53    QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
  54}
  55
  56static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req)
  57{
  58    QLIST_REMOVE(req, list);
  59    qemu_co_queue_restart_all(&req->wait_queue);
  60}
  61
  62void block_copy_state_free(BlockCopyState *s)
  63{
  64    if (!s) {
  65        return;
  66    }
  67
  68    bdrv_release_dirty_bitmap(s->copy_bitmap);
  69    shres_destroy(s->mem);
  70    g_free(s);
  71}
  72
  73BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
  74                                     int64_t cluster_size,
  75                                     BdrvRequestFlags write_flags, Error **errp)
  76{
  77    BlockCopyState *s;
  78    BdrvDirtyBitmap *copy_bitmap;
  79    uint32_t max_transfer =
  80            MIN_NON_ZERO(INT_MAX,
  81                         MIN_NON_ZERO(source->bs->bl.max_transfer,
  82                                      target->bs->bl.max_transfer));
  83
  84    copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
  85                                           errp);
  86    if (!copy_bitmap) {
  87        return NULL;
  88    }
  89    bdrv_disable_dirty_bitmap(copy_bitmap);
  90
  91    s = g_new(BlockCopyState, 1);
  92    *s = (BlockCopyState) {
  93        .source = source,
  94        .target = target,
  95        .copy_bitmap = copy_bitmap,
  96        .cluster_size = cluster_size,
  97        .len = bdrv_dirty_bitmap_size(copy_bitmap),
  98        .write_flags = write_flags,
  99        .mem = shres_create(BLOCK_COPY_MAX_MEM),
 100    };
 101
 102    if (max_transfer < cluster_size) {
 103        /*
 104         * copy_range does not respect max_transfer. We don't want to bother
 105         * with requests smaller than block-copy cluster size, so fallback to
 106         * buffered copying (read and write respect max_transfer on their
 107         * behalf).
 108         */
 109        s->use_copy_range = false;
 110        s->copy_size = cluster_size;
 111    } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
 112        /* Compression supports only cluster-size writes and no copy-range. */
 113        s->use_copy_range = false;
 114        s->copy_size = cluster_size;
 115    } else {
 116        /*
 117         * copy_range does not respect max_transfer (it's a TODO), so we factor
 118         * that in here.
 119         */
 120        s->use_copy_range = true;
 121        s->copy_size = MIN(MAX(cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
 122                           QEMU_ALIGN_DOWN(max_transfer, cluster_size));
 123    }
 124
 125    QLIST_INIT(&s->inflight_reqs);
 126
 127    return s;
 128}
 129
 130void block_copy_set_progress_callback(
 131        BlockCopyState *s,
 132        ProgressBytesCallbackFunc progress_bytes_callback,
 133        void *progress_opaque)
 134{
 135    s->progress_bytes_callback = progress_bytes_callback;
 136    s->progress_opaque = progress_opaque;
 137}
 138
 139void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
 140{
 141    s->progress = pm;
 142}
 143
 144/*
 145 * block_copy_do_copy
 146 *
 147 * Do copy of cluser-aligned chunk. @end is allowed to exceed s->len only to
 148 * cover last cluster when s->len is not aligned to clusters.
 149 *
 150 * No sync here: nor bitmap neighter intersecting requests handling, only copy.
 151 *
 152 * Returns 0 on success.
 153 */
 154static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
 155                                           int64_t start, int64_t end,
 156                                           bool *error_is_read)
 157{
 158    int ret;
 159    int nbytes = MIN(end, s->len) - start;
 160    void *bounce_buffer = NULL;
 161
 162    assert(QEMU_IS_ALIGNED(start, s->cluster_size));
 163    assert(QEMU_IS_ALIGNED(end, s->cluster_size));
 164    assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size));
 165
 166    if (s->use_copy_range) {
 167        ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
 168                                 0, s->write_flags);
 169        if (ret < 0) {
 170            trace_block_copy_copy_range_fail(s, start, ret);
 171            s->use_copy_range = false;
 172            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
 173            /* Fallback to read+write with allocated buffer */
 174        } else {
 175            goto out;
 176        }
 177    }
 178
 179    /*
 180     * In case of failed copy_range request above, we may proceed with buffered
 181     * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will
 182     * be properly limited, so don't care too much.
 183     */
 184
 185    bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
 186
 187    ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
 188    if (ret < 0) {
 189        trace_block_copy_read_fail(s, start, ret);
 190        if (error_is_read) {
 191            *error_is_read = true;
 192        }
 193        goto out;
 194    }
 195
 196    ret = bdrv_co_pwrite(s->target, start, nbytes, bounce_buffer,
 197                         s->write_flags);
 198    if (ret < 0) {
 199        trace_block_copy_write_fail(s, start, ret);
 200        if (error_is_read) {
 201            *error_is_read = false;
 202        }
 203        goto out;
 204    }
 205
 206out:
 207    qemu_vfree(bounce_buffer);
 208
 209    return ret;
 210}
 211
 212/*
 213 * Check if the cluster starting at offset is allocated or not.
 214 * return via pnum the number of contiguous clusters sharing this allocation.
 215 */
 216static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
 217                                           int64_t *pnum)
 218{
 219    BlockDriverState *bs = s->source->bs;
 220    int64_t count, total_count = 0;
 221    int64_t bytes = s->len - offset;
 222    int ret;
 223
 224    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
 225
 226    while (true) {
 227        ret = bdrv_is_allocated(bs, offset, bytes, &count);
 228        if (ret < 0) {
 229            return ret;
 230        }
 231
 232        total_count += count;
 233
 234        if (ret || count == 0) {
 235            /*
 236             * ret: partial segment(s) are considered allocated.
 237             * otherwise: unallocated tail is treated as an entire segment.
 238             */
 239            *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
 240            return ret;
 241        }
 242
 243        /* Unallocated segment(s) with uncertain following segment(s) */
 244        if (total_count >= s->cluster_size) {
 245            *pnum = total_count / s->cluster_size;
 246            return 0;
 247        }
 248
 249        offset += count;
 250        bytes -= count;
 251    }
 252}
 253
 254/*
 255 * Reset bits in copy_bitmap starting at offset if they represent unallocated
 256 * data in the image. May reset subsequent contiguous bits.
 257 * @return 0 when the cluster at @offset was unallocated,
 258 *         1 otherwise, and -ret on error.
 259 */
 260int64_t block_copy_reset_unallocated(BlockCopyState *s,
 261                                     int64_t offset, int64_t *count)
 262{
 263    int ret;
 264    int64_t clusters, bytes;
 265
 266    ret = block_copy_is_cluster_allocated(s, offset, &clusters);
 267    if (ret < 0) {
 268        return ret;
 269    }
 270
 271    bytes = clusters * s->cluster_size;
 272
 273    if (!ret) {
 274        bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
 275        progress_set_remaining(s->progress,
 276                               bdrv_get_dirty_count(s->copy_bitmap) +
 277                               s->in_flight_bytes);
 278    }
 279
 280    *count = bytes;
 281    return ret;
 282}
 283
 284int coroutine_fn block_copy(BlockCopyState *s,
 285                            int64_t start, uint64_t bytes,
 286                            bool *error_is_read)
 287{
 288    int ret = 0;
 289    int64_t end = bytes + start; /* bytes */
 290    int64_t status_bytes;
 291    BlockCopyInFlightReq req;
 292
 293    /*
 294     * block_copy() user is responsible for keeping source and target in same
 295     * aio context
 296     */
 297    assert(bdrv_get_aio_context(s->source->bs) ==
 298           bdrv_get_aio_context(s->target->bs));
 299
 300    assert(QEMU_IS_ALIGNED(start, s->cluster_size));
 301    assert(QEMU_IS_ALIGNED(end, s->cluster_size));
 302
 303    block_copy_wait_inflight_reqs(s, start, bytes);
 304    block_copy_inflight_req_begin(s, &req, start, end);
 305
 306    while (start < end) {
 307        int64_t next_zero, chunk_end;
 308
 309        if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
 310            trace_block_copy_skip(s, start);
 311            start += s->cluster_size;
 312            continue; /* already copied */
 313        }
 314
 315        chunk_end = MIN(end, start + s->copy_size);
 316
 317        next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
 318                                                chunk_end - start);
 319        if (next_zero >= 0) {
 320            assert(next_zero > start); /* start is dirty */
 321            assert(next_zero < chunk_end); /* no need to do MIN() */
 322            chunk_end = next_zero;
 323        }
 324
 325        if (s->skip_unallocated) {
 326            ret = block_copy_reset_unallocated(s, start, &status_bytes);
 327            if (ret == 0) {
 328                trace_block_copy_skip_range(s, start, status_bytes);
 329                start += status_bytes;
 330                continue;
 331            }
 332            /* Clamp to known allocated region */
 333            chunk_end = MIN(chunk_end, start + status_bytes);
 334        }
 335
 336        trace_block_copy_process(s, start);
 337
 338        bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
 339        s->in_flight_bytes += chunk_end - start;
 340
 341        co_get_from_shres(s->mem, chunk_end - start);
 342        ret = block_copy_do_copy(s, start, chunk_end, error_is_read);
 343        co_put_to_shres(s->mem, chunk_end - start);
 344        s->in_flight_bytes -= chunk_end - start;
 345        if (ret < 0) {
 346            bdrv_set_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
 347            break;
 348        }
 349
 350        progress_work_done(s->progress, chunk_end - start);
 351        s->progress_bytes_callback(chunk_end - start, s->progress_opaque);
 352        start = chunk_end;
 353        ret = 0;
 354    }
 355
 356    block_copy_inflight_req_end(&req);
 357
 358    return ret;
 359}
 360