1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include "qemu/osdep.h"
16
17#include "trace.h"
18#include "qapi/error.h"
19#include "block/block-copy.h"
20#include "sysemu/block-backend.h"
21#include "qemu/units.h"
22
23#define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
24#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
25#define BLOCK_COPY_MAX_MEM (128 * MiB)
26
27typedef struct BlockCopyInFlightReq {
28 int64_t offset;
29 int64_t bytes;
30 QLIST_ENTRY(BlockCopyInFlightReq) list;
31 CoQueue wait_queue;
32} BlockCopyInFlightReq;
33
34typedef struct BlockCopyState {
35
36
37
38
39
40 BdrvChild *source;
41 BdrvChild *target;
42 BdrvDirtyBitmap *copy_bitmap;
43 int64_t in_flight_bytes;
44 int64_t cluster_size;
45 bool use_copy_range;
46 int64_t copy_size;
47 uint64_t len;
48 QLIST_HEAD(, BlockCopyInFlightReq) inflight_reqs;
49
50 BdrvRequestFlags write_flags;
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 bool skip_unallocated;
67
68 ProgressMeter *progress;
69
70 ProgressBytesCallbackFunc progress_bytes_callback;
71 void *progress_opaque;
72
73 SharedResource *mem;
74} BlockCopyState;
75
76static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
77 int64_t offset,
78 int64_t bytes)
79{
80 BlockCopyInFlightReq *req;
81
82 QLIST_FOREACH(req, &s->inflight_reqs, list) {
83 if (offset + bytes > req->offset && offset < req->offset + req->bytes) {
84 return req;
85 }
86 }
87
88 return NULL;
89}
90
91
92
93
94
95static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
96 int64_t bytes)
97{
98 BlockCopyInFlightReq *req = find_conflicting_inflight_req(s, offset, bytes);
99
100 if (!req) {
101 return false;
102 }
103
104 qemu_co_queue_wait(&req->wait_queue, NULL);
105
106 return true;
107}
108
109
110static void block_copy_inflight_req_begin(BlockCopyState *s,
111 BlockCopyInFlightReq *req,
112 int64_t offset, int64_t bytes)
113{
114 assert(!find_conflicting_inflight_req(s, offset, bytes));
115
116 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
117 s->in_flight_bytes += bytes;
118
119 req->offset = offset;
120 req->bytes = bytes;
121 qemu_co_queue_init(&req->wait_queue);
122 QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
123}
124
125
126
127
128
129
130
131
132static void coroutine_fn block_copy_inflight_req_shrink(BlockCopyState *s,
133 BlockCopyInFlightReq *req, int64_t new_bytes)
134{
135 if (new_bytes == req->bytes) {
136 return;
137 }
138
139 assert(new_bytes > 0 && new_bytes < req->bytes);
140
141 s->in_flight_bytes -= req->bytes - new_bytes;
142 bdrv_set_dirty_bitmap(s->copy_bitmap,
143 req->offset + new_bytes, req->bytes - new_bytes);
144
145 req->bytes = new_bytes;
146 qemu_co_queue_restart_all(&req->wait_queue);
147}
148
149static void coroutine_fn block_copy_inflight_req_end(BlockCopyState *s,
150 BlockCopyInFlightReq *req,
151 int ret)
152{
153 s->in_flight_bytes -= req->bytes;
154 if (ret < 0) {
155 bdrv_set_dirty_bitmap(s->copy_bitmap, req->offset, req->bytes);
156 }
157 QLIST_REMOVE(req, list);
158 qemu_co_queue_restart_all(&req->wait_queue);
159}
160
161void block_copy_state_free(BlockCopyState *s)
162{
163 if (!s) {
164 return;
165 }
166
167 bdrv_release_dirty_bitmap(s->copy_bitmap);
168 shres_destroy(s->mem);
169 g_free(s);
170}
171
172static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
173{
174 return MIN_NON_ZERO(INT_MAX,
175 MIN_NON_ZERO(source->bs->bl.max_transfer,
176 target->bs->bl.max_transfer));
177}
178
179BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
180 int64_t cluster_size,
181 BdrvRequestFlags write_flags, Error **errp)
182{
183 BlockCopyState *s;
184 BdrvDirtyBitmap *copy_bitmap;
185
186 copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
187 errp);
188 if (!copy_bitmap) {
189 return NULL;
190 }
191 bdrv_disable_dirty_bitmap(copy_bitmap);
192
193 s = g_new(BlockCopyState, 1);
194 *s = (BlockCopyState) {
195 .source = source,
196 .target = target,
197 .copy_bitmap = copy_bitmap,
198 .cluster_size = cluster_size,
199 .len = bdrv_dirty_bitmap_size(copy_bitmap),
200 .write_flags = write_flags,
201 .mem = shres_create(BLOCK_COPY_MAX_MEM),
202 };
203
204 if (block_copy_max_transfer(source, target) < cluster_size) {
205
206
207
208
209
210
211 s->use_copy_range = false;
212 s->copy_size = cluster_size;
213 } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
214
215 s->use_copy_range = false;
216 s->copy_size = cluster_size;
217 } else {
218
219
220
221
222 s->use_copy_range = true;
223 s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
224 }
225
226 QLIST_INIT(&s->inflight_reqs);
227
228 return s;
229}
230
231void block_copy_set_progress_callback(
232 BlockCopyState *s,
233 ProgressBytesCallbackFunc progress_bytes_callback,
234 void *progress_opaque)
235{
236 s->progress_bytes_callback = progress_bytes_callback;
237 s->progress_opaque = progress_opaque;
238}
239
240void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
241{
242 s->progress = pm;
243}
244
245
246
247
248
249
250
251
252
253
254
255static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
256 int64_t offset, int64_t bytes,
257 bool zeroes, bool *error_is_read)
258{
259 int ret;
260 int64_t nbytes = MIN(offset + bytes, s->len) - offset;
261 void *bounce_buffer = NULL;
262
263 assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes);
264 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
265 assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
266 assert(offset < s->len);
267 assert(offset + bytes <= s->len ||
268 offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
269 assert(nbytes < INT_MAX);
270
271 if (zeroes) {
272 ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
273 ~BDRV_REQ_WRITE_COMPRESSED);
274 if (ret < 0) {
275 trace_block_copy_write_zeroes_fail(s, offset, ret);
276 if (error_is_read) {
277 *error_is_read = false;
278 }
279 }
280 return ret;
281 }
282
283 if (s->use_copy_range) {
284 ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
285 0, s->write_flags);
286 if (ret < 0) {
287 trace_block_copy_copy_range_fail(s, offset, ret);
288 s->use_copy_range = false;
289 s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
290
291 } else {
292 if (s->use_copy_range) {
293
294
295
296
297
298
299
300
301
302 s->copy_size =
303 MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
304 QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
305 s->target),
306 s->cluster_size));
307 }
308 goto out;
309 }
310 }
311
312
313
314
315
316
317
318
319
320
321 bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
322
323 ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
324 if (ret < 0) {
325 trace_block_copy_read_fail(s, offset, ret);
326 if (error_is_read) {
327 *error_is_read = true;
328 }
329 goto out;
330 }
331
332 ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
333 s->write_flags);
334 if (ret < 0) {
335 trace_block_copy_write_fail(s, offset, ret);
336 if (error_is_read) {
337 *error_is_read = false;
338 }
339 goto out;
340 }
341
342out:
343 qemu_vfree(bounce_buffer);
344
345 return ret;
346}
347
348static int block_copy_block_status(BlockCopyState *s, int64_t offset,
349 int64_t bytes, int64_t *pnum)
350{
351 int64_t num;
352 BlockDriverState *base;
353 int ret;
354
355 if (s->skip_unallocated && s->source->bs->backing) {
356 base = s->source->bs->backing->bs;
357 } else {
358 base = NULL;
359 }
360
361 ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num,
362 NULL, NULL);
363 if (ret < 0 || num < s->cluster_size) {
364
365
366
367
368 num = s->cluster_size;
369 ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA;
370 } else if (offset + num == s->len) {
371 num = QEMU_ALIGN_UP(num, s->cluster_size);
372 } else {
373 num = QEMU_ALIGN_DOWN(num, s->cluster_size);
374 }
375
376 *pnum = num;
377 return ret;
378}
379
380
381
382
383
384static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
385 int64_t *pnum)
386{
387 BlockDriverState *bs = s->source->bs;
388 int64_t count, total_count = 0;
389 int64_t bytes = s->len - offset;
390 int ret;
391
392 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
393
394 while (true) {
395 ret = bdrv_is_allocated(bs, offset, bytes, &count);
396 if (ret < 0) {
397 return ret;
398 }
399
400 total_count += count;
401
402 if (ret || count == 0) {
403
404
405
406
407 *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
408 return ret;
409 }
410
411
412 if (total_count >= s->cluster_size) {
413 *pnum = total_count / s->cluster_size;
414 return 0;
415 }
416
417 offset += count;
418 bytes -= count;
419 }
420}
421
422
423
424
425
426
427
428int64_t block_copy_reset_unallocated(BlockCopyState *s,
429 int64_t offset, int64_t *count)
430{
431 int ret;
432 int64_t clusters, bytes;
433
434 ret = block_copy_is_cluster_allocated(s, offset, &clusters);
435 if (ret < 0) {
436 return ret;
437 }
438
439 bytes = clusters * s->cluster_size;
440
441 if (!ret) {
442 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
443 progress_set_remaining(s->progress,
444 bdrv_get_dirty_count(s->copy_bitmap) +
445 s->in_flight_bytes);
446 }
447
448 *count = bytes;
449 return ret;
450}
451
452
453
454
455
456
457
458
459static int coroutine_fn block_copy_dirty_clusters(BlockCopyState *s,
460 int64_t offset, int64_t bytes,
461 bool *error_is_read)
462{
463 int ret = 0;
464 bool found_dirty = false;
465
466
467
468
469
470 assert(bdrv_get_aio_context(s->source->bs) ==
471 bdrv_get_aio_context(s->target->bs));
472
473 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
474 assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
475
476 while (bytes) {
477 BlockCopyInFlightReq req;
478 int64_t next_zero, cur_bytes, status_bytes;
479
480 if (!bdrv_dirty_bitmap_get(s->copy_bitmap, offset)) {
481 trace_block_copy_skip(s, offset);
482 offset += s->cluster_size;
483 bytes -= s->cluster_size;
484 continue;
485 }
486
487 found_dirty = true;
488
489 cur_bytes = MIN(bytes, s->copy_size);
490
491 next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, offset,
492 cur_bytes);
493 if (next_zero >= 0) {
494 assert(next_zero > offset);
495 assert(next_zero < offset + cur_bytes);
496 cur_bytes = next_zero - offset;
497 }
498 block_copy_inflight_req_begin(s, &req, offset, cur_bytes);
499
500 ret = block_copy_block_status(s, offset, cur_bytes, &status_bytes);
501 assert(ret >= 0);
502 cur_bytes = MIN(cur_bytes, status_bytes);
503 block_copy_inflight_req_shrink(s, &req, cur_bytes);
504 if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
505 block_copy_inflight_req_end(s, &req, 0);
506 progress_set_remaining(s->progress,
507 bdrv_get_dirty_count(s->copy_bitmap) +
508 s->in_flight_bytes);
509 trace_block_copy_skip_range(s, offset, status_bytes);
510 offset += status_bytes;
511 bytes -= status_bytes;
512 continue;
513 }
514
515 trace_block_copy_process(s, offset);
516
517 co_get_from_shres(s->mem, cur_bytes);
518 ret = block_copy_do_copy(s, offset, cur_bytes, ret & BDRV_BLOCK_ZERO,
519 error_is_read);
520 co_put_to_shres(s->mem, cur_bytes);
521 block_copy_inflight_req_end(s, &req, ret);
522 if (ret < 0) {
523 return ret;
524 }
525
526 progress_work_done(s->progress, cur_bytes);
527 s->progress_bytes_callback(cur_bytes, s->progress_opaque);
528 offset += cur_bytes;
529 bytes -= cur_bytes;
530 }
531
532 return found_dirty;
533}
534
535
536
537
538
539
540
541
542
543
544int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
545 bool *error_is_read)
546{
547 int ret;
548
549 do {
550 ret = block_copy_dirty_clusters(s, offset, bytes, error_is_read);
551
552 if (ret == 0) {
553 ret = block_copy_wait_one(s, offset, bytes);
554 }
555
556
557
558
559
560
561
562
563
564
565 } while (ret > 0);
566
567 return ret;
568}
569
570BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
571{
572 return s->copy_bitmap;
573}
574
575void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
576{
577 s->skip_unallocated = skip;
578}
579