1
2
3
4
5
6
7
8
9
10
11
12
13
14#include "qemu/osdep.h"
15
16#include "trace.h"
17#include "block/block.h"
18#include "block/block_int.h"
19#include "block/blockjob_int.h"
20#include "block/block_backup.h"
21#include "qapi/error.h"
22#include "qapi/qmp/qerror.h"
23#include "qemu/ratelimit.h"
24#include "qemu/cutils.h"
25#include "sysemu/block-backend.h"
26#include "qemu/bitmap.h"
27#include "qemu/error-report.h"
28
29#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
30
31typedef struct BackupBlockJob {
32 BlockJob common;
33 BlockBackend *target;
34
35 BdrvDirtyBitmap *sync_bitmap;
36 MirrorSyncMode sync_mode;
37 BlockdevOnError on_source_error;
38 BlockdevOnError on_target_error;
39 CoRwlock flush_rwlock;
40 uint64_t len;
41 uint64_t bytes_read;
42 int64_t cluster_size;
43 bool compress;
44 NotifierWithReturn before_write;
45 QLIST_HEAD(, CowRequest) inflight_reqs;
46
47 HBitmap *copy_bitmap;
48 bool use_copy_range;
49 int64_t copy_range_size;
50
51 bool serialize_target_writes;
52} BackupBlockJob;
53
54static const BlockJobDriver backup_job_driver;
55
56
57static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
58 int64_t start,
59 int64_t end)
60{
61 CowRequest *req;
62 bool retry;
63
64 do {
65 retry = false;
66 QLIST_FOREACH(req, &job->inflight_reqs, list) {
67 if (end > req->start_byte && start < req->end_byte) {
68 qemu_co_queue_wait(&req->wait_queue, NULL);
69 retry = true;
70 break;
71 }
72 }
73 } while (retry);
74}
75
76
77static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
78 int64_t start, int64_t end)
79{
80 req->start_byte = start;
81 req->end_byte = end;
82 qemu_co_queue_init(&req->wait_queue);
83 QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
84}
85
86
87static void cow_request_end(CowRequest *req)
88{
89 QLIST_REMOVE(req, list);
90 qemu_co_queue_restart_all(&req->wait_queue);
91}
92
93
94
95static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
96 int64_t start,
97 int64_t end,
98 bool is_write_notifier,
99 bool *error_is_read,
100 void **bounce_buffer)
101{
102 int ret;
103 struct iovec iov;
104 QEMUIOVector qiov;
105 BlockBackend *blk = job->common.blk;
106 int nbytes;
107 int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
108 int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
109
110 hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
111 nbytes = MIN(job->cluster_size, job->len - start);
112 if (!*bounce_buffer) {
113 *bounce_buffer = blk_blockalign(blk, job->cluster_size);
114 }
115 iov.iov_base = *bounce_buffer;
116 iov.iov_len = nbytes;
117 qemu_iovec_init_external(&qiov, &iov, 1);
118
119 ret = blk_co_preadv(blk, start, qiov.size, &qiov, read_flags);
120 if (ret < 0) {
121 trace_backup_do_cow_read_fail(job, start, ret);
122 if (error_is_read) {
123 *error_is_read = true;
124 }
125 goto fail;
126 }
127
128 if (qemu_iovec_is_zero(&qiov)) {
129 ret = blk_co_pwrite_zeroes(job->target, start,
130 qiov.size, write_flags | BDRV_REQ_MAY_UNMAP);
131 } else {
132 ret = blk_co_pwritev(job->target, start,
133 qiov.size, &qiov, write_flags |
134 (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0));
135 }
136 if (ret < 0) {
137 trace_backup_do_cow_write_fail(job, start, ret);
138 if (error_is_read) {
139 *error_is_read = false;
140 }
141 goto fail;
142 }
143
144 return nbytes;
145fail:
146 hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
147 return ret;
148
149}
150
151
152
153static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
154 int64_t start,
155 int64_t end,
156 bool is_write_notifier)
157{
158 int ret;
159 int nr_clusters;
160 BlockBackend *blk = job->common.blk;
161 int nbytes;
162 int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
163 int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
164
165 assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
166 nbytes = MIN(job->copy_range_size, end - start);
167 nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
168 hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
169 nr_clusters);
170 ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
171 read_flags, write_flags);
172 if (ret < 0) {
173 trace_backup_do_cow_copy_range_fail(job, start, ret);
174 hbitmap_set(job->copy_bitmap, start / job->cluster_size,
175 nr_clusters);
176 return ret;
177 }
178
179 return nbytes;
180}
181
182static int coroutine_fn backup_do_cow(BackupBlockJob *job,
183 int64_t offset, uint64_t bytes,
184 bool *error_is_read,
185 bool is_write_notifier)
186{
187 CowRequest cow_request;
188 int ret = 0;
189 int64_t start, end;
190 void *bounce_buffer = NULL;
191
192 qemu_co_rwlock_rdlock(&job->flush_rwlock);
193
194 start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
195 end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);
196
197 trace_backup_do_cow_enter(job, start, offset, bytes);
198
199 wait_for_overlapping_requests(job, start, end);
200 cow_request_begin(&cow_request, job, start, end);
201
202 while (start < end) {
203 if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
204 trace_backup_do_cow_skip(job, start);
205 start += job->cluster_size;
206 continue;
207 }
208
209 trace_backup_do_cow_process(job, start);
210
211 if (job->use_copy_range) {
212 ret = backup_cow_with_offload(job, start, end, is_write_notifier);
213 if (ret < 0) {
214 job->use_copy_range = false;
215 }
216 }
217 if (!job->use_copy_range) {
218 ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
219 error_is_read, &bounce_buffer);
220 }
221 if (ret < 0) {
222 break;
223 }
224
225
226
227
228 start += ret;
229 job->bytes_read += ret;
230 job_progress_update(&job->common.job, ret);
231 ret = 0;
232 }
233
234 if (bounce_buffer) {
235 qemu_vfree(bounce_buffer);
236 }
237
238 cow_request_end(&cow_request);
239
240 trace_backup_do_cow_return(job, offset, bytes, ret);
241
242 qemu_co_rwlock_unlock(&job->flush_rwlock);
243
244 return ret;
245}
246
247static int coroutine_fn backup_before_write_notify(
248 NotifierWithReturn *notifier,
249 void *opaque)
250{
251 BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
252 BdrvTrackedRequest *req = opaque;
253
254 assert(req->bs == blk_bs(job->common.blk));
255 assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE));
256 assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE));
257
258 return backup_do_cow(job, req->offset, req->bytes, NULL, true);
259}
260
261static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
262{
263 BdrvDirtyBitmap *bm;
264 BlockDriverState *bs = blk_bs(job->common.blk);
265
266 if (ret < 0) {
267
268 bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
269 assert(bm);
270 } else {
271
272 bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
273 assert(bm);
274 }
275}
276
277static void backup_commit(Job *job)
278{
279 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
280 if (s->sync_bitmap) {
281 backup_cleanup_sync_bitmap(s, 0);
282 }
283}
284
285static void backup_abort(Job *job)
286{
287 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
288 if (s->sync_bitmap) {
289 backup_cleanup_sync_bitmap(s, -1);
290 }
291}
292
293static void backup_clean(Job *job)
294{
295 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
296 assert(s->target);
297 blk_unref(s->target);
298 s->target = NULL;
299}
300
301static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
302{
303 BackupBlockJob *s = container_of(job, BackupBlockJob, common);
304
305 blk_set_aio_context(s->target, aio_context);
306}
307
308void backup_do_checkpoint(BlockJob *job, Error **errp)
309{
310 BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
311 int64_t len;
312
313 assert(block_job_driver(job) == &backup_job_driver);
314
315 if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
316 error_setg(errp, "The backup job only supports block checkpoint in"
317 " sync=none mode");
318 return;
319 }
320
321 len = DIV_ROUND_UP(backup_job->len, backup_job->cluster_size);
322 hbitmap_set(backup_job->copy_bitmap, 0, len);
323}
324
325void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
326 uint64_t bytes)
327{
328 BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
329 int64_t start, end;
330
331 assert(block_job_driver(job) == &backup_job_driver);
332
333 start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
334 end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
335 wait_for_overlapping_requests(backup_job, start, end);
336}
337
338void backup_cow_request_begin(CowRequest *req, BlockJob *job,
339 int64_t offset, uint64_t bytes)
340{
341 BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
342 int64_t start, end;
343
344 assert(block_job_driver(job) == &backup_job_driver);
345
346 start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
347 end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
348 cow_request_begin(req, backup_job, start, end);
349}
350
351void backup_cow_request_end(CowRequest *req)
352{
353 cow_request_end(req);
354}
355
356static void backup_drain(BlockJob *job)
357{
358 BackupBlockJob *s = container_of(job, BackupBlockJob, common);
359
360
361
362
363 if (s->target) {
364 BlockBackend *target = s->target;
365 blk_ref(target);
366 blk_drain(target);
367 blk_unref(target);
368 }
369}
370
371static BlockErrorAction backup_error_action(BackupBlockJob *job,
372 bool read, int error)
373{
374 if (read) {
375 return block_job_error_action(&job->common, job->on_source_error,
376 true, error);
377 } else {
378 return block_job_error_action(&job->common, job->on_target_error,
379 false, error);
380 }
381}
382
383static bool coroutine_fn yield_and_check(BackupBlockJob *job)
384{
385 uint64_t delay_ns;
386
387 if (job_is_cancelled(&job->common.job)) {
388 return true;
389 }
390
391
392
393 delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read);
394 job->bytes_read = 0;
395 job_sleep_ns(&job->common.job, delay_ns);
396
397 if (job_is_cancelled(&job->common.job)) {
398 return true;
399 }
400
401 return false;
402}
403
404static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
405{
406 int ret;
407 bool error_is_read;
408 int64_t cluster;
409 HBitmapIter hbi;
410
411 hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
412 while ((cluster = hbitmap_iter_next(&hbi, true)) != -1) {
413 do {
414 if (yield_and_check(job)) {
415 return 0;
416 }
417 ret = backup_do_cow(job, cluster * job->cluster_size,
418 job->cluster_size, &error_is_read, false);
419 if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
420 BLOCK_ERROR_ACTION_REPORT)
421 {
422 return ret;
423 }
424 } while (ret < 0);
425 }
426
427 return 0;
428}
429
430
431static void backup_incremental_init_copy_bitmap(BackupBlockJob *job)
432{
433 BdrvDirtyBitmapIter *dbi;
434 int64_t offset;
435 int64_t end = DIV_ROUND_UP(bdrv_dirty_bitmap_size(job->sync_bitmap),
436 job->cluster_size);
437
438 dbi = bdrv_dirty_iter_new(job->sync_bitmap);
439 while ((offset = bdrv_dirty_iter_next(dbi)) != -1) {
440 int64_t cluster = offset / job->cluster_size;
441 int64_t next_cluster;
442
443 offset += bdrv_dirty_bitmap_granularity(job->sync_bitmap);
444 if (offset >= bdrv_dirty_bitmap_size(job->sync_bitmap)) {
445 hbitmap_set(job->copy_bitmap, cluster, end - cluster);
446 break;
447 }
448
449 offset = bdrv_dirty_bitmap_next_zero(job->sync_bitmap, offset);
450 if (offset == -1) {
451 hbitmap_set(job->copy_bitmap, cluster, end - cluster);
452 break;
453 }
454
455 next_cluster = DIV_ROUND_UP(offset, job->cluster_size);
456 hbitmap_set(job->copy_bitmap, cluster, next_cluster - cluster);
457 if (next_cluster >= end) {
458 break;
459 }
460
461 bdrv_set_dirty_iter(dbi, next_cluster * job->cluster_size);
462 }
463
464
465 job_progress_update(&job->common.job,
466 job->len - hbitmap_count(job->copy_bitmap) * job->cluster_size);
467
468 bdrv_dirty_iter_free(dbi);
469}
470
471static int coroutine_fn backup_run(Job *job, Error **errp)
472{
473 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
474 BlockDriverState *bs = blk_bs(s->common.blk);
475 int64_t offset, nb_clusters;
476 int ret = 0;
477
478 QLIST_INIT(&s->inflight_reqs);
479 qemu_co_rwlock_init(&s->flush_rwlock);
480
481 nb_clusters = DIV_ROUND_UP(s->len, s->cluster_size);
482 job_progress_set_remaining(job, s->len);
483
484 s->copy_bitmap = hbitmap_alloc(nb_clusters, 0);
485 if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
486 backup_incremental_init_copy_bitmap(s);
487 } else {
488 hbitmap_set(s->copy_bitmap, 0, nb_clusters);
489 }
490
491
492 s->before_write.notify = backup_before_write_notify;
493 bdrv_add_before_write_notifier(bs, &s->before_write);
494
495 if (s->sync_mode == MIRROR_SYNC_MODE_NONE) {
496
497
498 while (!job_is_cancelled(job)) {
499
500
501 job_yield(job);
502 }
503 } else if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
504 ret = backup_run_incremental(s);
505 } else {
506
507 for (offset = 0; offset < s->len;
508 offset += s->cluster_size) {
509 bool error_is_read;
510 int alloced = 0;
511
512 if (yield_and_check(s)) {
513 break;
514 }
515
516 if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
517 int i;
518 int64_t n;
519
520
521
522
523 for (i = 0; i < s->cluster_size;) {
524
525
526
527
528
529
530 alloced =
531 bdrv_is_allocated(bs, offset + i,
532 s->cluster_size - i, &n);
533 i += n;
534
535 if (alloced || n == 0) {
536 break;
537 }
538 }
539
540
541
542 if (alloced == 0) {
543 continue;
544 }
545 }
546
547 if (alloced < 0) {
548 ret = alloced;
549 } else {
550 ret = backup_do_cow(s, offset, s->cluster_size,
551 &error_is_read, false);
552 }
553 if (ret < 0) {
554
555 BlockErrorAction action =
556 backup_error_action(s, error_is_read, -ret);
557 if (action == BLOCK_ERROR_ACTION_REPORT) {
558 break;
559 } else {
560 offset -= s->cluster_size;
561 continue;
562 }
563 }
564 }
565 }
566
567 notifier_with_return_remove(&s->before_write);
568
569
570 qemu_co_rwlock_wrlock(&s->flush_rwlock);
571 qemu_co_rwlock_unlock(&s->flush_rwlock);
572 hbitmap_free(s->copy_bitmap);
573
574 return ret;
575}
576
577static const BlockJobDriver backup_job_driver = {
578 .job_driver = {
579 .instance_size = sizeof(BackupBlockJob),
580 .job_type = JOB_TYPE_BACKUP,
581 .free = block_job_free,
582 .user_resume = block_job_user_resume,
583 .drain = block_job_drain,
584 .run = backup_run,
585 .commit = backup_commit,
586 .abort = backup_abort,
587 .clean = backup_clean,
588 },
589 .attached_aio_context = backup_attached_aio_context,
590 .drain = backup_drain,
591};
592
593BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
594 BlockDriverState *target, int64_t speed,
595 MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
596 bool compress,
597 BlockdevOnError on_source_error,
598 BlockdevOnError on_target_error,
599 int creation_flags,
600 BlockCompletionFunc *cb, void *opaque,
601 JobTxn *txn, Error **errp)
602{
603 int64_t len;
604 BlockDriverInfo bdi;
605 BackupBlockJob *job = NULL;
606 int ret;
607
608 assert(bs);
609 assert(target);
610
611 if (bs == target) {
612 error_setg(errp, "Source and target cannot be the same");
613 return NULL;
614 }
615
616 if (!bdrv_is_inserted(bs)) {
617 error_setg(errp, "Device is not inserted: %s",
618 bdrv_get_device_name(bs));
619 return NULL;
620 }
621
622 if (!bdrv_is_inserted(target)) {
623 error_setg(errp, "Device is not inserted: %s",
624 bdrv_get_device_name(target));
625 return NULL;
626 }
627
628 if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
629 error_setg(errp, "Compression is not supported for this drive %s",
630 bdrv_get_device_name(target));
631 return NULL;
632 }
633
634 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
635 return NULL;
636 }
637
638 if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
639 return NULL;
640 }
641
642 if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
643 if (!sync_bitmap) {
644 error_setg(errp, "must provide a valid bitmap name for "
645 "\"incremental\" sync mode");
646 return NULL;
647 }
648
649
650 if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
651 return NULL;
652 }
653 } else if (sync_bitmap) {
654 error_setg(errp,
655 "a sync_bitmap was provided to backup_run, "
656 "but received an incompatible sync_mode (%s)",
657 MirrorSyncMode_str(sync_mode));
658 return NULL;
659 }
660
661 len = bdrv_getlength(bs);
662 if (len < 0) {
663 error_setg_errno(errp, -len, "unable to get length for '%s'",
664 bdrv_get_device_name(bs));
665 goto error;
666 }
667
668
669 job = block_job_create(job_id, &backup_job_driver, txn, bs,
670 BLK_PERM_CONSISTENT_READ,
671 BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
672 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
673 speed, creation_flags, cb, opaque, errp);
674 if (!job) {
675 goto error;
676 }
677
678
679 job->target = blk_new(BLK_PERM_WRITE,
680 BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
681 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
682 ret = blk_insert_bs(job->target, target, errp);
683 if (ret < 0) {
684 goto error;
685 }
686
687 job->on_source_error = on_source_error;
688 job->on_target_error = on_target_error;
689 job->sync_mode = sync_mode;
690 job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
691 sync_bitmap : NULL;
692 job->compress = compress;
693
694
695 job->serialize_target_writes = bdrv_chain_contains(target, bs);
696
697
698
699
700 ret = bdrv_get_info(target, &bdi);
701 if (ret == -ENOTSUP && !target->backing) {
702
703 warn_report("The target block device doesn't provide "
704 "information about the block size and it doesn't have a "
705 "backing file. The default block size of %u bytes is "
706 "used. If the actual block size of the target exceeds "
707 "this default, the backup may be unusable",
708 BACKUP_CLUSTER_SIZE_DEFAULT);
709 job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
710 } else if (ret < 0 && !target->backing) {
711 error_setg_errno(errp, -ret,
712 "Couldn't determine the cluster size of the target image, "
713 "which has no backing file");
714 error_append_hint(errp,
715 "Aborting, since this may create an unusable destination image\n");
716 goto error;
717 } else if (ret < 0 && target->backing) {
718
719 job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
720 } else {
721 job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
722 }
723 job->use_copy_range = true;
724 job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
725 blk_get_max_transfer(job->target));
726 job->copy_range_size = MAX(job->cluster_size,
727 QEMU_ALIGN_UP(job->copy_range_size,
728 job->cluster_size));
729
730
731 block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
732 &error_abort);
733 job->len = len;
734
735 return &job->common;
736
737 error:
738 if (sync_bitmap) {
739 bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
740 }
741 if (job) {
742 backup_clean(&job->common.job);
743 job_early_fail(&job->common.job);
744 }
745
746 return NULL;
747}
748