1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include "qemu/osdep.h"
16
17#include "trace.h"
18#include "qapi/error.h"
19#include "block/block-copy.h"
20#include "sysemu/block-backend.h"
21#include "qemu/units.h"
22#include "qemu/coroutine.h"
23#include "block/aio_task.h"
24
25#define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
26#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
27#define BLOCK_COPY_MAX_MEM (128 * MiB)
28#define BLOCK_COPY_MAX_WORKERS 64
29#define BLOCK_COPY_SLICE_TIME 100000000ULL
30
31static coroutine_fn int block_copy_task_entry(AioTask *task);
32
33typedef struct BlockCopyCallState {
34
35 BlockCopyState *s;
36 int64_t offset;
37 int64_t bytes;
38 int max_workers;
39 int64_t max_chunk;
40 bool ignore_ratelimit;
41 BlockCopyAsyncCallbackFunc cb;
42 void *cb_opaque;
43
44
45 Coroutine *co;
46
47
48 QLIST_ENTRY(BlockCopyCallState) list;
49
50
51 int ret;
52 bool finished;
53 QemuCoSleepState *sleep_state;
54 bool cancelled;
55
56
57 bool error_is_read;
58} BlockCopyCallState;
59
60typedef struct BlockCopyTask {
61 AioTask task;
62
63 BlockCopyState *s;
64 BlockCopyCallState *call_state;
65 int64_t offset;
66 int64_t bytes;
67 bool zeroes;
68 QLIST_ENTRY(BlockCopyTask) list;
69 CoQueue wait_queue;
70} BlockCopyTask;
71
72static int64_t task_end(BlockCopyTask *task)
73{
74 return task->offset + task->bytes;
75}
76
77typedef struct BlockCopyState {
78
79
80
81
82
83 BdrvChild *source;
84 BdrvChild *target;
85 BdrvDirtyBitmap *copy_bitmap;
86 int64_t in_flight_bytes;
87 int64_t cluster_size;
88 bool use_copy_range;
89 int64_t copy_size;
90 uint64_t len;
91 QLIST_HEAD(, BlockCopyTask) tasks;
92 QLIST_HEAD(, BlockCopyCallState) calls;
93
94 BdrvRequestFlags write_flags;
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110 bool skip_unallocated;
111
112 ProgressMeter *progress;
113
114 SharedResource *mem;
115
116 uint64_t speed;
117 RateLimit rate_limit;
118} BlockCopyState;
119
120static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
121 int64_t offset, int64_t bytes)
122{
123 BlockCopyTask *t;
124
125 QLIST_FOREACH(t, &s->tasks, list) {
126 if (offset + bytes > t->offset && offset < t->offset + t->bytes) {
127 return t;
128 }
129 }
130
131 return NULL;
132}
133
134
135
136
137
138static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
139 int64_t bytes)
140{
141 BlockCopyTask *task = find_conflicting_task(s, offset, bytes);
142
143 if (!task) {
144 return false;
145 }
146
147 qemu_co_queue_wait(&task->wait_queue, NULL);
148
149 return true;
150}
151
152
153
154
155
156static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
157 BlockCopyCallState *call_state,
158 int64_t offset, int64_t bytes)
159{
160 BlockCopyTask *task;
161 int64_t max_chunk = MIN_NON_ZERO(s->copy_size, call_state->max_chunk);
162
163 if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
164 offset, offset + bytes,
165 max_chunk, &offset, &bytes))
166 {
167 return NULL;
168 }
169
170 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
171 bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
172
173
174 assert(!find_conflicting_task(s, offset, bytes));
175
176 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
177 s->in_flight_bytes += bytes;
178
179 task = g_new(BlockCopyTask, 1);
180 *task = (BlockCopyTask) {
181 .task.func = block_copy_task_entry,
182 .s = s,
183 .call_state = call_state,
184 .offset = offset,
185 .bytes = bytes,
186 };
187 qemu_co_queue_init(&task->wait_queue);
188 QLIST_INSERT_HEAD(&s->tasks, task, list);
189
190 return task;
191}
192
193
194
195
196
197
198
199
200static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
201 int64_t new_bytes)
202{
203 if (new_bytes == task->bytes) {
204 return;
205 }
206
207 assert(new_bytes > 0 && new_bytes < task->bytes);
208
209 task->s->in_flight_bytes -= task->bytes - new_bytes;
210 bdrv_set_dirty_bitmap(task->s->copy_bitmap,
211 task->offset + new_bytes, task->bytes - new_bytes);
212
213 task->bytes = new_bytes;
214 qemu_co_queue_restart_all(&task->wait_queue);
215}
216
217static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
218{
219 task->s->in_flight_bytes -= task->bytes;
220 if (ret < 0) {
221 bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes);
222 }
223 QLIST_REMOVE(task, list);
224 qemu_co_queue_restart_all(&task->wait_queue);
225}
226
227void block_copy_state_free(BlockCopyState *s)
228{
229 if (!s) {
230 return;
231 }
232
233 bdrv_release_dirty_bitmap(s->copy_bitmap);
234 shres_destroy(s->mem);
235 g_free(s);
236}
237
238static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
239{
240 return MIN_NON_ZERO(INT_MAX,
241 MIN_NON_ZERO(source->bs->bl.max_transfer,
242 target->bs->bl.max_transfer));
243}
244
245BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
246 int64_t cluster_size, bool use_copy_range,
247 BdrvRequestFlags write_flags, Error **errp)
248{
249 BlockCopyState *s;
250 BdrvDirtyBitmap *copy_bitmap;
251
252 copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
253 errp);
254 if (!copy_bitmap) {
255 return NULL;
256 }
257 bdrv_disable_dirty_bitmap(copy_bitmap);
258
259 s = g_new(BlockCopyState, 1);
260 *s = (BlockCopyState) {
261 .source = source,
262 .target = target,
263 .copy_bitmap = copy_bitmap,
264 .cluster_size = cluster_size,
265 .len = bdrv_dirty_bitmap_size(copy_bitmap),
266 .write_flags = write_flags,
267 .mem = shres_create(BLOCK_COPY_MAX_MEM),
268 };
269
270 if (block_copy_max_transfer(source, target) < cluster_size) {
271
272
273
274
275
276
277 s->use_copy_range = false;
278 s->copy_size = cluster_size;
279 } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
280
281 s->use_copy_range = false;
282 s->copy_size = cluster_size;
283 } else {
284
285
286
287
288 s->use_copy_range = use_copy_range;
289 s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
290 }
291
292 QLIST_INIT(&s->tasks);
293 QLIST_INIT(&s->calls);
294
295 return s;
296}
297
298void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
299{
300 s->progress = pm;
301}
302
303
304
305
306
307
308
309
310
311
312static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
313 BlockCopyTask *task)
314{
315 if (!pool) {
316 int ret = task->task.func(&task->task);
317
318 g_free(task);
319 return ret;
320 }
321
322 aio_task_pool_wait_slot(pool);
323 if (aio_task_pool_status(pool) < 0) {
324 co_put_to_shres(task->s->mem, task->bytes);
325 block_copy_task_end(task, -ECANCELED);
326 g_free(task);
327 return -ECANCELED;
328 }
329
330 aio_task_pool_start_task(pool, &task->task);
331
332 return 0;
333}
334
335
336
337
338
339
340
341
342
343
344
345static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
346 int64_t offset, int64_t bytes,
347 bool zeroes, bool *error_is_read)
348{
349 int ret;
350 int64_t nbytes = MIN(offset + bytes, s->len) - offset;
351 void *bounce_buffer = NULL;
352
353 assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes);
354 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
355 assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
356 assert(offset < s->len);
357 assert(offset + bytes <= s->len ||
358 offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
359 assert(nbytes < INT_MAX);
360
361 if (zeroes) {
362 ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
363 ~BDRV_REQ_WRITE_COMPRESSED);
364 if (ret < 0) {
365 trace_block_copy_write_zeroes_fail(s, offset, ret);
366 *error_is_read = false;
367 }
368 return ret;
369 }
370
371 if (s->use_copy_range) {
372 ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
373 0, s->write_flags);
374 if (ret < 0) {
375 trace_block_copy_copy_range_fail(s, offset, ret);
376 s->use_copy_range = false;
377 s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
378
379 } else {
380 if (s->use_copy_range) {
381
382
383
384
385
386
387
388
389
390 s->copy_size =
391 MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
392 QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
393 s->target),
394 s->cluster_size));
395 }
396 goto out;
397 }
398 }
399
400
401
402
403
404
405
406
407
408
409 bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
410
411 ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
412 if (ret < 0) {
413 trace_block_copy_read_fail(s, offset, ret);
414 *error_is_read = true;
415 goto out;
416 }
417
418 ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
419 s->write_flags);
420 if (ret < 0) {
421 trace_block_copy_write_fail(s, offset, ret);
422 *error_is_read = false;
423 goto out;
424 }
425
426out:
427 qemu_vfree(bounce_buffer);
428
429 return ret;
430}
431
432static coroutine_fn int block_copy_task_entry(AioTask *task)
433{
434 BlockCopyTask *t = container_of(task, BlockCopyTask, task);
435 bool error_is_read = false;
436 int ret;
437
438 ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes,
439 &error_is_read);
440 if (ret < 0 && !t->call_state->ret) {
441 t->call_state->ret = ret;
442 t->call_state->error_is_read = error_is_read;
443 } else {
444 progress_work_done(t->s->progress, t->bytes);
445 }
446 co_put_to_shres(t->s->mem, t->bytes);
447 block_copy_task_end(t, ret);
448
449 return ret;
450}
451
452static int block_copy_block_status(BlockCopyState *s, int64_t offset,
453 int64_t bytes, int64_t *pnum)
454{
455 int64_t num;
456 BlockDriverState *base;
457 int ret;
458
459 if (s->skip_unallocated) {
460 base = bdrv_backing_chain_next(s->source->bs);
461 } else {
462 base = NULL;
463 }
464
465 ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num,
466 NULL, NULL);
467 if (ret < 0 || num < s->cluster_size) {
468
469
470
471
472 num = s->cluster_size;
473 ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA;
474 } else if (offset + num == s->len) {
475 num = QEMU_ALIGN_UP(num, s->cluster_size);
476 } else {
477 num = QEMU_ALIGN_DOWN(num, s->cluster_size);
478 }
479
480 *pnum = num;
481 return ret;
482}
483
484
485
486
487
488static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
489 int64_t *pnum)
490{
491 BlockDriverState *bs = s->source->bs;
492 int64_t count, total_count = 0;
493 int64_t bytes = s->len - offset;
494 int ret;
495
496 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
497
498 while (true) {
499 ret = bdrv_is_allocated(bs, offset, bytes, &count);
500 if (ret < 0) {
501 return ret;
502 }
503
504 total_count += count;
505
506 if (ret || count == 0) {
507
508
509
510
511 *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
512 return ret;
513 }
514
515
516 if (total_count >= s->cluster_size) {
517 *pnum = total_count / s->cluster_size;
518 return 0;
519 }
520
521 offset += count;
522 bytes -= count;
523 }
524}
525
526
527
528
529
530
531
532int64_t block_copy_reset_unallocated(BlockCopyState *s,
533 int64_t offset, int64_t *count)
534{
535 int ret;
536 int64_t clusters, bytes;
537
538 ret = block_copy_is_cluster_allocated(s, offset, &clusters);
539 if (ret < 0) {
540 return ret;
541 }
542
543 bytes = clusters * s->cluster_size;
544
545 if (!ret) {
546 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
547 progress_set_remaining(s->progress,
548 bdrv_get_dirty_count(s->copy_bitmap) +
549 s->in_flight_bytes);
550 }
551
552 *count = bytes;
553 return ret;
554}
555
556
557
558
559
560
561
562
563static int coroutine_fn
564block_copy_dirty_clusters(BlockCopyCallState *call_state)
565{
566 BlockCopyState *s = call_state->s;
567 int64_t offset = call_state->offset;
568 int64_t bytes = call_state->bytes;
569
570 int ret = 0;
571 bool found_dirty = false;
572 int64_t end = offset + bytes;
573 AioTaskPool *aio = NULL;
574
575
576
577
578
579 assert(bdrv_get_aio_context(s->source->bs) ==
580 bdrv_get_aio_context(s->target->bs));
581
582 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
583 assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
584
585 while (bytes && aio_task_pool_status(aio) == 0 && !call_state->cancelled) {
586 BlockCopyTask *task;
587 int64_t status_bytes;
588
589 task = block_copy_task_create(s, call_state, offset, bytes);
590 if (!task) {
591
592 trace_block_copy_skip_range(s, offset, bytes);
593 break;
594 }
595 if (task->offset > offset) {
596 trace_block_copy_skip_range(s, offset, task->offset - offset);
597 }
598
599 found_dirty = true;
600
601 ret = block_copy_block_status(s, task->offset, task->bytes,
602 &status_bytes);
603 assert(ret >= 0);
604 if (status_bytes < task->bytes) {
605 block_copy_task_shrink(task, status_bytes);
606 }
607 if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
608 block_copy_task_end(task, 0);
609 progress_set_remaining(s->progress,
610 bdrv_get_dirty_count(s->copy_bitmap) +
611 s->in_flight_bytes);
612 trace_block_copy_skip_range(s, task->offset, task->bytes);
613 offset = task_end(task);
614 bytes = end - offset;
615 g_free(task);
616 continue;
617 }
618 task->zeroes = ret & BDRV_BLOCK_ZERO;
619
620 if (s->speed) {
621 if (!call_state->ignore_ratelimit) {
622 uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
623 if (ns > 0) {
624 block_copy_task_end(task, -EAGAIN);
625 g_free(task);
626 qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
627 &call_state->sleep_state);
628 continue;
629 }
630 }
631
632 ratelimit_calculate_delay(&s->rate_limit, task->bytes);
633 }
634
635 trace_block_copy_process(s, task->offset);
636
637 co_get_from_shres(s->mem, task->bytes);
638
639 offset = task_end(task);
640 bytes = end - offset;
641
642 if (!aio && bytes) {
643 aio = aio_task_pool_new(call_state->max_workers);
644 }
645
646 ret = block_copy_task_run(aio, task);
647 if (ret < 0) {
648 goto out;
649 }
650 }
651
652out:
653 if (aio) {
654 aio_task_pool_wait_all(aio);
655
656
657
658
659
660
661
662
663
664 assert(ret >= 0 || aio_task_pool_status(aio) < 0);
665 ret = aio_task_pool_status(aio);
666
667 aio_task_pool_free(aio);
668 }
669
670 return ret < 0 ? ret : found_dirty;
671}
672
673void block_copy_kick(BlockCopyCallState *call_state)
674{
675 if (call_state->sleep_state) {
676 qemu_co_sleep_wake(call_state->sleep_state);
677 }
678}
679
680
681
682
683
684
685
686
687
688
689static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
690{
691 int ret;
692
693 QLIST_INSERT_HEAD(&call_state->s->calls, call_state, list);
694
695 do {
696 ret = block_copy_dirty_clusters(call_state);
697
698 if (ret == 0 && !call_state->cancelled) {
699 ret = block_copy_wait_one(call_state->s, call_state->offset,
700 call_state->bytes);
701 }
702
703
704
705
706
707
708
709
710
711
712 } while (ret > 0 && !call_state->cancelled);
713
714 call_state->finished = true;
715
716 if (call_state->cb) {
717 call_state->cb(call_state->cb_opaque);
718 }
719
720 QLIST_REMOVE(call_state, list);
721
722 return ret;
723}
724
725int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
726 bool ignore_ratelimit)
727{
728 BlockCopyCallState call_state = {
729 .s = s,
730 .offset = start,
731 .bytes = bytes,
732 .ignore_ratelimit = ignore_ratelimit,
733 .max_workers = BLOCK_COPY_MAX_WORKERS,
734 };
735
736 return block_copy_common(&call_state);
737}
738
739static void coroutine_fn block_copy_async_co_entry(void *opaque)
740{
741 block_copy_common(opaque);
742}
743
744BlockCopyCallState *block_copy_async(BlockCopyState *s,
745 int64_t offset, int64_t bytes,
746 int max_workers, int64_t max_chunk,
747 BlockCopyAsyncCallbackFunc cb,
748 void *cb_opaque)
749{
750 BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
751
752 *call_state = (BlockCopyCallState) {
753 .s = s,
754 .offset = offset,
755 .bytes = bytes,
756 .max_workers = max_workers,
757 .max_chunk = max_chunk,
758 .cb = cb,
759 .cb_opaque = cb_opaque,
760
761 .co = qemu_coroutine_create(block_copy_async_co_entry, call_state),
762 };
763
764 qemu_coroutine_enter(call_state->co);
765
766 return call_state;
767}
768
769void block_copy_call_free(BlockCopyCallState *call_state)
770{
771 if (!call_state) {
772 return;
773 }
774
775 assert(call_state->finished);
776 g_free(call_state);
777}
778
779bool block_copy_call_finished(BlockCopyCallState *call_state)
780{
781 return call_state->finished;
782}
783
784bool block_copy_call_succeeded(BlockCopyCallState *call_state)
785{
786 return call_state->finished && !call_state->cancelled &&
787 call_state->ret == 0;
788}
789
790bool block_copy_call_failed(BlockCopyCallState *call_state)
791{
792 return call_state->finished && !call_state->cancelled &&
793 call_state->ret < 0;
794}
795
796bool block_copy_call_cancelled(BlockCopyCallState *call_state)
797{
798 return call_state->cancelled;
799}
800
801int block_copy_call_status(BlockCopyCallState *call_state, bool *error_is_read)
802{
803 assert(call_state->finished);
804 if (error_is_read) {
805 *error_is_read = call_state->error_is_read;
806 }
807 return call_state->ret;
808}
809
810void block_copy_call_cancel(BlockCopyCallState *call_state)
811{
812 call_state->cancelled = true;
813 block_copy_kick(call_state);
814}
815
816BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
817{
818 return s->copy_bitmap;
819}
820
821void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
822{
823 s->skip_unallocated = skip;
824}
825
826void block_copy_set_speed(BlockCopyState *s, uint64_t speed)
827{
828 s->speed = speed;
829 if (speed > 0) {
830 ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
831 }
832
833
834
835
836
837
838
839}
840