1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26
27#include "block/qdict.h"
28#include "sysemu/block-backend.h"
29#include "qemu/main-loop.h"
30#include "qemu/module.h"
31#include "qcow2.h"
32#include "qemu/error-report.h"
33#include "qapi/error.h"
34#include "qapi/qapi-events-block-core.h"
35#include "qapi/qmp/qdict.h"
36#include "qapi/qmp/qstring.h"
37#include "trace.h"
38#include "qemu/option_int.h"
39#include "qemu/cutils.h"
40#include "qemu/bswap.h"
41#include "qapi/qobject-input-visitor.h"
42#include "qapi/qapi-visit-block-core.h"
43#include "crypto.h"
44#include "block/aio_task.h"
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63typedef struct {
64 uint32_t magic;
65 uint32_t len;
66} QEMU_PACKED QCowExtension;
67
68#define QCOW2_EXT_MAGIC_END 0
69#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
70#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
71#define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
72#define QCOW2_EXT_MAGIC_BITMAPS 0x23852875
73#define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
74
75static int coroutine_fn
76qcow2_co_preadv_compressed(BlockDriverState *bs,
77 uint64_t file_cluster_offset,
78 uint64_t offset,
79 uint64_t bytes,
80 QEMUIOVector *qiov,
81 size_t qiov_offset);
82
83static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
84{
85 const QCowHeader *cow_header = (const void *)buf;
86
87 if (buf_size >= sizeof(QCowHeader) &&
88 be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
89 be32_to_cpu(cow_header->version) >= 2)
90 return 100;
91 else
92 return 0;
93}
94
95
96static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
97 uint8_t *buf, size_t buflen,
98 void *opaque, Error **errp)
99{
100 BlockDriverState *bs = opaque;
101 BDRVQcow2State *s = bs->opaque;
102 ssize_t ret;
103
104 if ((offset + buflen) > s->crypto_header.length) {
105 error_setg(errp, "Request for data outside of extension header");
106 return -1;
107 }
108
109 ret = bdrv_pread(bs->file,
110 s->crypto_header.offset + offset, buf, buflen);
111 if (ret < 0) {
112 error_setg_errno(errp, -ret, "Could not read encryption header");
113 return -1;
114 }
115 return ret;
116}
117
118
119static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
120 void *opaque, Error **errp)
121{
122 BlockDriverState *bs = opaque;
123 BDRVQcow2State *s = bs->opaque;
124 int64_t ret;
125 int64_t clusterlen;
126
127 ret = qcow2_alloc_clusters(bs, headerlen);
128 if (ret < 0) {
129 error_setg_errno(errp, -ret,
130 "Cannot allocate cluster for LUKS header size %zu",
131 headerlen);
132 return -1;
133 }
134
135 s->crypto_header.length = headerlen;
136 s->crypto_header.offset = ret;
137
138
139
140 clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
141 assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
142 ret = bdrv_pwrite_zeroes(bs->file,
143 ret + headerlen,
144 clusterlen - headerlen, 0);
145 if (ret < 0) {
146 error_setg_errno(errp, -ret, "Could not zero fill encryption header");
147 return -1;
148 }
149
150 return ret;
151}
152
153
154static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
155 const uint8_t *buf, size_t buflen,
156 void *opaque, Error **errp)
157{
158 BlockDriverState *bs = opaque;
159 BDRVQcow2State *s = bs->opaque;
160 ssize_t ret;
161
162 if ((offset + buflen) > s->crypto_header.length) {
163 error_setg(errp, "Request for data outside of extension header");
164 return -1;
165 }
166
167 ret = bdrv_pwrite(bs->file,
168 s->crypto_header.offset + offset, buf, buflen);
169 if (ret < 0) {
170 error_setg_errno(errp, -ret, "Could not read encryption header");
171 return -1;
172 }
173 return ret;
174}
175
176
177
178
179
180
181
182
183
184static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
185 uint64_t end_offset, void **p_feature_table,
186 int flags, bool *need_update_header,
187 Error **errp)
188{
189 BDRVQcow2State *s = bs->opaque;
190 QCowExtension ext;
191 uint64_t offset;
192 int ret;
193 Qcow2BitmapHeaderExt bitmaps_ext;
194
195 if (need_update_header != NULL) {
196 *need_update_header = false;
197 }
198
199#ifdef DEBUG_EXT
200 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
201#endif
202 offset = start_offset;
203 while (offset < end_offset) {
204
205#ifdef DEBUG_EXT
206
207 if (offset > s->cluster_size)
208 printf("qcow2_read_extension: suspicious offset %lu\n", offset);
209
210 printf("attempting to read extended header in offset %lu\n", offset);
211#endif
212
213 ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
214 if (ret < 0) {
215 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
216 "pread fail from offset %" PRIu64, offset);
217 return 1;
218 }
219 ext.magic = be32_to_cpu(ext.magic);
220 ext.len = be32_to_cpu(ext.len);
221 offset += sizeof(ext);
222#ifdef DEBUG_EXT
223 printf("ext.magic = 0x%x\n", ext.magic);
224#endif
225 if (offset > end_offset || ext.len > end_offset - offset) {
226 error_setg(errp, "Header extension too large");
227 return -EINVAL;
228 }
229
230 switch (ext.magic) {
231 case QCOW2_EXT_MAGIC_END:
232 return 0;
233
234 case QCOW2_EXT_MAGIC_BACKING_FORMAT:
235 if (ext.len >= sizeof(bs->backing_format)) {
236 error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
237 " too large (>=%zu)", ext.len,
238 sizeof(bs->backing_format));
239 return 2;
240 }
241 ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
242 if (ret < 0) {
243 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
244 "Could not read format name");
245 return 3;
246 }
247 bs->backing_format[ext.len] = '\0';
248 s->image_backing_format = g_strdup(bs->backing_format);
249#ifdef DEBUG_EXT
250 printf("Qcow2: Got format extension %s\n", bs->backing_format);
251#endif
252 break;
253
254 case QCOW2_EXT_MAGIC_FEATURE_TABLE:
255 if (p_feature_table != NULL) {
256 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
257 ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
258 if (ret < 0) {
259 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
260 "Could not read table");
261 return ret;
262 }
263
264 *p_feature_table = feature_table;
265 }
266 break;
267
268 case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
269 unsigned int cflags = 0;
270 if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
271 error_setg(errp, "CRYPTO header extension only "
272 "expected with LUKS encryption method");
273 return -EINVAL;
274 }
275 if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
276 error_setg(errp, "CRYPTO header extension size %u, "
277 "but expected size %zu", ext.len,
278 sizeof(Qcow2CryptoHeaderExtension));
279 return -EINVAL;
280 }
281
282 ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len);
283 if (ret < 0) {
284 error_setg_errno(errp, -ret,
285 "Unable to read CRYPTO header extension");
286 return ret;
287 }
288 s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
289 s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
290
291 if ((s->crypto_header.offset % s->cluster_size) != 0) {
292 error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
293 "not a multiple of cluster size '%u'",
294 s->crypto_header.offset, s->cluster_size);
295 return -EINVAL;
296 }
297
298 if (flags & BDRV_O_NO_IO) {
299 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
300 }
301 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
302 qcow2_crypto_hdr_read_func,
303 bs, cflags, QCOW2_MAX_THREADS, errp);
304 if (!s->crypto) {
305 return -EINVAL;
306 }
307 } break;
308
309 case QCOW2_EXT_MAGIC_BITMAPS:
310 if (ext.len != sizeof(bitmaps_ext)) {
311 error_setg_errno(errp, -ret, "bitmaps_ext: "
312 "Invalid extension length");
313 return -EINVAL;
314 }
315
316 if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
317 if (s->qcow_version < 3) {
318
319 warn_report("This qcow2 v2 image contains bitmaps, but "
320 "they may have been modified by a program "
321 "without persistent bitmap support; so now "
322 "they must all be considered inconsistent");
323 } else {
324 warn_report("a program lacking bitmap support "
325 "modified this file, so all bitmaps are now "
326 "considered inconsistent");
327 }
328 error_printf("Some clusters may be leaked, "
329 "run 'qemu-img check -r' on the image "
330 "file to fix.");
331 if (need_update_header != NULL) {
332
333 *need_update_header = true;
334 }
335 break;
336 }
337
338 ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len);
339 if (ret < 0) {
340 error_setg_errno(errp, -ret, "bitmaps_ext: "
341 "Could not read ext header");
342 return ret;
343 }
344
345 if (bitmaps_ext.reserved32 != 0) {
346 error_setg_errno(errp, -ret, "bitmaps_ext: "
347 "Reserved field is not zero");
348 return -EINVAL;
349 }
350
351 bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps);
352 bitmaps_ext.bitmap_directory_size =
353 be64_to_cpu(bitmaps_ext.bitmap_directory_size);
354 bitmaps_ext.bitmap_directory_offset =
355 be64_to_cpu(bitmaps_ext.bitmap_directory_offset);
356
357 if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
358 error_setg(errp,
359 "bitmaps_ext: Image has %" PRIu32 " bitmaps, "
360 "exceeding the QEMU supported maximum of %d",
361 bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
362 return -EINVAL;
363 }
364
365 if (bitmaps_ext.nb_bitmaps == 0) {
366 error_setg(errp, "found bitmaps extension with zero bitmaps");
367 return -EINVAL;
368 }
369
370 if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) {
371 error_setg(errp, "bitmaps_ext: "
372 "invalid bitmap directory offset");
373 return -EINVAL;
374 }
375
376 if (bitmaps_ext.bitmap_directory_size >
377 QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
378 error_setg(errp, "bitmaps_ext: "
379 "bitmap directory size (%" PRIu64 ") exceeds "
380 "the maximum supported size (%d)",
381 bitmaps_ext.bitmap_directory_size,
382 QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
383 return -EINVAL;
384 }
385
386 s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
387 s->bitmap_directory_offset =
388 bitmaps_ext.bitmap_directory_offset;
389 s->bitmap_directory_size =
390 bitmaps_ext.bitmap_directory_size;
391
392#ifdef DEBUG_EXT
393 printf("Qcow2: Got bitmaps extension: "
394 "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
395 s->bitmap_directory_offset, s->nb_bitmaps);
396#endif
397 break;
398
399 case QCOW2_EXT_MAGIC_DATA_FILE:
400 {
401 s->image_data_file = g_malloc0(ext.len + 1);
402 ret = bdrv_pread(bs->file, offset, s->image_data_file, ext.len);
403 if (ret < 0) {
404 error_setg_errno(errp, -ret,
405 "ERROR: Could not read data file name");
406 return ret;
407 }
408#ifdef DEBUG_EXT
409 printf("Qcow2: Got external data file %s\n", s->image_data_file);
410#endif
411 break;
412 }
413
414 default:
415
416
417
418 {
419 Qcow2UnknownHeaderExtension *uext;
420
421 uext = g_malloc0(sizeof(*uext) + ext.len);
422 uext->magic = ext.magic;
423 uext->len = ext.len;
424 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
425
426 ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
427 if (ret < 0) {
428 error_setg_errno(errp, -ret, "ERROR: unknown extension: "
429 "Could not read data");
430 return ret;
431 }
432 }
433 break;
434 }
435
436 offset += ((ext.len + 7) & ~7);
437 }
438
439 return 0;
440}
441
442static void cleanup_unknown_header_ext(BlockDriverState *bs)
443{
444 BDRVQcow2State *s = bs->opaque;
445 Qcow2UnknownHeaderExtension *uext, *next;
446
447 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
448 QLIST_REMOVE(uext, next);
449 g_free(uext);
450 }
451}
452
453static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
454 uint64_t mask)
455{
456 char *features = g_strdup("");
457 char *old;
458
459 while (table && table->name[0] != '\0') {
460 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
461 if (mask & (1ULL << table->bit)) {
462 old = features;
463 features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
464 table->name);
465 g_free(old);
466 mask &= ~(1ULL << table->bit);
467 }
468 }
469 table++;
470 }
471
472 if (mask) {
473 old = features;
474 features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
475 old, *old ? ", " : "", mask);
476 g_free(old);
477 }
478
479 error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
480 g_free(features);
481}
482
483
484
485
486
487
488
489
490int qcow2_mark_dirty(BlockDriverState *bs)
491{
492 BDRVQcow2State *s = bs->opaque;
493 uint64_t val;
494 int ret;
495
496 assert(s->qcow_version >= 3);
497
498 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
499 return 0;
500 }
501
502 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
503 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
504 &val, sizeof(val));
505 if (ret < 0) {
506 return ret;
507 }
508 ret = bdrv_flush(bs->file->bs);
509 if (ret < 0) {
510 return ret;
511 }
512
513
514 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
515 return 0;
516}
517
518
519
520
521
522
523static int qcow2_mark_clean(BlockDriverState *bs)
524{
525 BDRVQcow2State *s = bs->opaque;
526
527 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
528 int ret;
529
530 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
531
532 ret = qcow2_flush_caches(bs);
533 if (ret < 0) {
534 return ret;
535 }
536
537 return qcow2_update_header(bs);
538 }
539 return 0;
540}
541
542
543
544
545int qcow2_mark_corrupt(BlockDriverState *bs)
546{
547 BDRVQcow2State *s = bs->opaque;
548
549 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
550 return qcow2_update_header(bs);
551}
552
553
554
555
556
557int qcow2_mark_consistent(BlockDriverState *bs)
558{
559 BDRVQcow2State *s = bs->opaque;
560
561 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
562 int ret = qcow2_flush_caches(bs);
563 if (ret < 0) {
564 return ret;
565 }
566
567 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
568 return qcow2_update_header(bs);
569 }
570 return 0;
571}
572
573static void qcow2_add_check_result(BdrvCheckResult *out,
574 const BdrvCheckResult *src,
575 bool set_allocation_info)
576{
577 out->corruptions += src->corruptions;
578 out->leaks += src->leaks;
579 out->check_errors += src->check_errors;
580 out->corruptions_fixed += src->corruptions_fixed;
581 out->leaks_fixed += src->leaks_fixed;
582
583 if (set_allocation_info) {
584 out->image_end_offset = src->image_end_offset;
585 out->bfi = src->bfi;
586 }
587}
588
589static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
590 BdrvCheckResult *result,
591 BdrvCheckMode fix)
592{
593 BdrvCheckResult snapshot_res = {};
594 BdrvCheckResult refcount_res = {};
595 int ret;
596
597 memset(result, 0, sizeof(*result));
598
599 ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix);
600 if (ret < 0) {
601 qcow2_add_check_result(result, &snapshot_res, false);
602 return ret;
603 }
604
605 ret = qcow2_check_refcounts(bs, &refcount_res, fix);
606 qcow2_add_check_result(result, &refcount_res, true);
607 if (ret < 0) {
608 qcow2_add_check_result(result, &snapshot_res, false);
609 return ret;
610 }
611
612 ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix);
613 qcow2_add_check_result(result, &snapshot_res, false);
614 if (ret < 0) {
615 return ret;
616 }
617
618 if (fix && result->check_errors == 0 && result->corruptions == 0) {
619 ret = qcow2_mark_clean(bs);
620 if (ret < 0) {
621 return ret;
622 }
623 return qcow2_mark_consistent(bs);
624 }
625 return ret;
626}
627
628static int coroutine_fn qcow2_co_check(BlockDriverState *bs,
629 BdrvCheckResult *result,
630 BdrvCheckMode fix)
631{
632 BDRVQcow2State *s = bs->opaque;
633 int ret;
634
635 qemu_co_mutex_lock(&s->lock);
636 ret = qcow2_co_check_locked(bs, result, fix);
637 qemu_co_mutex_unlock(&s->lock);
638 return ret;
639}
640
641int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
642 uint64_t entries, size_t entry_len,
643 int64_t max_size_bytes, const char *table_name,
644 Error **errp)
645{
646 BDRVQcow2State *s = bs->opaque;
647
648 if (entries > max_size_bytes / entry_len) {
649 error_setg(errp, "%s too large", table_name);
650 return -EFBIG;
651 }
652
653
654
655 if ((INT64_MAX - entries * entry_len < offset) ||
656 (offset_into_cluster(s, offset) != 0)) {
657 error_setg(errp, "%s offset invalid", table_name);
658 return -EINVAL;
659 }
660
661 return 0;
662}
663
664static const char *const mutable_opts[] = {
665 QCOW2_OPT_LAZY_REFCOUNTS,
666 QCOW2_OPT_DISCARD_REQUEST,
667 QCOW2_OPT_DISCARD_SNAPSHOT,
668 QCOW2_OPT_DISCARD_OTHER,
669 QCOW2_OPT_OVERLAP,
670 QCOW2_OPT_OVERLAP_TEMPLATE,
671 QCOW2_OPT_OVERLAP_MAIN_HEADER,
672 QCOW2_OPT_OVERLAP_ACTIVE_L1,
673 QCOW2_OPT_OVERLAP_ACTIVE_L2,
674 QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
675 QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
676 QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
677 QCOW2_OPT_OVERLAP_INACTIVE_L1,
678 QCOW2_OPT_OVERLAP_INACTIVE_L2,
679 QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
680 QCOW2_OPT_CACHE_SIZE,
681 QCOW2_OPT_L2_CACHE_SIZE,
682 QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
683 QCOW2_OPT_REFCOUNT_CACHE_SIZE,
684 QCOW2_OPT_CACHE_CLEAN_INTERVAL,
685 NULL
686};
687
688static QemuOptsList qcow2_runtime_opts = {
689 .name = "qcow2",
690 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
691 .desc = {
692 {
693 .name = QCOW2_OPT_LAZY_REFCOUNTS,
694 .type = QEMU_OPT_BOOL,
695 .help = "Postpone refcount updates",
696 },
697 {
698 .name = QCOW2_OPT_DISCARD_REQUEST,
699 .type = QEMU_OPT_BOOL,
700 .help = "Pass guest discard requests to the layer below",
701 },
702 {
703 .name = QCOW2_OPT_DISCARD_SNAPSHOT,
704 .type = QEMU_OPT_BOOL,
705 .help = "Generate discard requests when snapshot related space "
706 "is freed",
707 },
708 {
709 .name = QCOW2_OPT_DISCARD_OTHER,
710 .type = QEMU_OPT_BOOL,
711 .help = "Generate discard requests when other clusters are freed",
712 },
713 {
714 .name = QCOW2_OPT_OVERLAP,
715 .type = QEMU_OPT_STRING,
716 .help = "Selects which overlap checks to perform from a range of "
717 "templates (none, constant, cached, all)",
718 },
719 {
720 .name = QCOW2_OPT_OVERLAP_TEMPLATE,
721 .type = QEMU_OPT_STRING,
722 .help = "Selects which overlap checks to perform from a range of "
723 "templates (none, constant, cached, all)",
724 },
725 {
726 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
727 .type = QEMU_OPT_BOOL,
728 .help = "Check for unintended writes into the main qcow2 header",
729 },
730 {
731 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
732 .type = QEMU_OPT_BOOL,
733 .help = "Check for unintended writes into the active L1 table",
734 },
735 {
736 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
737 .type = QEMU_OPT_BOOL,
738 .help = "Check for unintended writes into an active L2 table",
739 },
740 {
741 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
742 .type = QEMU_OPT_BOOL,
743 .help = "Check for unintended writes into the refcount table",
744 },
745 {
746 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
747 .type = QEMU_OPT_BOOL,
748 .help = "Check for unintended writes into a refcount block",
749 },
750 {
751 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
752 .type = QEMU_OPT_BOOL,
753 .help = "Check for unintended writes into the snapshot table",
754 },
755 {
756 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
757 .type = QEMU_OPT_BOOL,
758 .help = "Check for unintended writes into an inactive L1 table",
759 },
760 {
761 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
762 .type = QEMU_OPT_BOOL,
763 .help = "Check for unintended writes into an inactive L2 table",
764 },
765 {
766 .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
767 .type = QEMU_OPT_BOOL,
768 .help = "Check for unintended writes into the bitmap directory",
769 },
770 {
771 .name = QCOW2_OPT_CACHE_SIZE,
772 .type = QEMU_OPT_SIZE,
773 .help = "Maximum combined metadata (L2 tables and refcount blocks) "
774 "cache size",
775 },
776 {
777 .name = QCOW2_OPT_L2_CACHE_SIZE,
778 .type = QEMU_OPT_SIZE,
779 .help = "Maximum L2 table cache size",
780 },
781 {
782 .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
783 .type = QEMU_OPT_SIZE,
784 .help = "Size of each entry in the L2 cache",
785 },
786 {
787 .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
788 .type = QEMU_OPT_SIZE,
789 .help = "Maximum refcount block cache size",
790 },
791 {
792 .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
793 .type = QEMU_OPT_NUMBER,
794 .help = "Clean unused cache entries after this time (in seconds)",
795 },
796 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
797 "ID of secret providing qcow2 AES key or LUKS passphrase"),
798 { }
799 },
800};
801
802static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
803 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER,
804 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1,
805 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2,
806 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
807 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
808 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
809 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1,
810 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
811 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
812};
813
814static void cache_clean_timer_cb(void *opaque)
815{
816 BlockDriverState *bs = opaque;
817 BDRVQcow2State *s = bs->opaque;
818 qcow2_cache_clean_unused(s->l2_table_cache);
819 qcow2_cache_clean_unused(s->refcount_block_cache);
820 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
821 (int64_t) s->cache_clean_interval * 1000);
822}
823
824static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
825{
826 BDRVQcow2State *s = bs->opaque;
827 if (s->cache_clean_interval > 0) {
828 s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
829 SCALE_MS, cache_clean_timer_cb,
830 bs);
831 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
832 (int64_t) s->cache_clean_interval * 1000);
833 }
834}
835
836static void cache_clean_timer_del(BlockDriverState *bs)
837{
838 BDRVQcow2State *s = bs->opaque;
839 if (s->cache_clean_timer) {
840 timer_del(s->cache_clean_timer);
841 timer_free(s->cache_clean_timer);
842 s->cache_clean_timer = NULL;
843 }
844}
845
846static void qcow2_detach_aio_context(BlockDriverState *bs)
847{
848 cache_clean_timer_del(bs);
849}
850
851static void qcow2_attach_aio_context(BlockDriverState *bs,
852 AioContext *new_context)
853{
854 cache_clean_timer_init(bs, new_context);
855}
856
857static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
858 uint64_t *l2_cache_size,
859 uint64_t *l2_cache_entry_size,
860 uint64_t *refcount_cache_size, Error **errp)
861{
862 BDRVQcow2State *s = bs->opaque;
863 uint64_t combined_cache_size, l2_cache_max_setting;
864 bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
865 bool l2_cache_entry_size_set;
866 int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
867 uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
868 uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
869
870
871 uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t),
872 s->cluster_size);
873
874 combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
875 l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
876 refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
877 l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE);
878
879 combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
880 l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE,
881 DEFAULT_L2_CACHE_MAX_SIZE);
882 *refcount_cache_size = qemu_opt_get_size(opts,
883 QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
884
885 *l2_cache_entry_size = qemu_opt_get_size(
886 opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
887
888 *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);
889
890 if (combined_cache_size_set) {
891 if (l2_cache_size_set && refcount_cache_size_set) {
892 error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
893 " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
894 "at the same time");
895 return;
896 } else if (l2_cache_size_set &&
897 (l2_cache_max_setting > combined_cache_size)) {
898 error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
899 QCOW2_OPT_CACHE_SIZE);
900 return;
901 } else if (*refcount_cache_size > combined_cache_size) {
902 error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
903 QCOW2_OPT_CACHE_SIZE);
904 return;
905 }
906
907 if (l2_cache_size_set) {
908 *refcount_cache_size = combined_cache_size - *l2_cache_size;
909 } else if (refcount_cache_size_set) {
910 *l2_cache_size = combined_cache_size - *refcount_cache_size;
911 } else {
912
913
914 if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
915 *l2_cache_size = max_l2_cache;
916 *refcount_cache_size = combined_cache_size - *l2_cache_size;
917 } else {
918 *refcount_cache_size =
919 MIN(combined_cache_size, min_refcount_cache);
920 *l2_cache_size = combined_cache_size - *refcount_cache_size;
921 }
922 }
923 }
924
925
926
927
928
929
930 if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) {
931 *l2_cache_entry_size = MIN(s->cluster_size, 4096);
932 }
933
934
935
936
937 if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
938 *l2_cache_entry_size > s->cluster_size ||
939 !is_power_of_2(*l2_cache_entry_size)) {
940 error_setg(errp, "L2 cache entry size must be a power of two "
941 "between %d and the cluster size (%d)",
942 1 << MIN_CLUSTER_BITS, s->cluster_size);
943 return;
944 }
945}
946
947typedef struct Qcow2ReopenState {
948 Qcow2Cache *l2_table_cache;
949 Qcow2Cache *refcount_block_cache;
950 int l2_slice_size;
951 bool use_lazy_refcounts;
952 int overlap_check;
953 bool discard_passthrough[QCOW2_DISCARD_MAX];
954 uint64_t cache_clean_interval;
955 QCryptoBlockOpenOptions *crypto_opts;
956} Qcow2ReopenState;
957
958static int qcow2_update_options_prepare(BlockDriverState *bs,
959 Qcow2ReopenState *r,
960 QDict *options, int flags,
961 Error **errp)
962{
963 BDRVQcow2State *s = bs->opaque;
964 QemuOpts *opts = NULL;
965 const char *opt_overlap_check, *opt_overlap_check_template;
966 int overlap_check_template = 0;
967 uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
968 int i;
969 const char *encryptfmt;
970 QDict *encryptopts = NULL;
971 Error *local_err = NULL;
972 int ret;
973
974 qdict_extract_subqdict(options, &encryptopts, "encrypt.");
975 encryptfmt = qdict_get_try_str(encryptopts, "format");
976
977 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
978 qemu_opts_absorb_qdict(opts, options, &local_err);
979 if (local_err) {
980 error_propagate(errp, local_err);
981 ret = -EINVAL;
982 goto fail;
983 }
984
985
986 read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
987 &refcount_cache_size, &local_err);
988 if (local_err) {
989 error_propagate(errp, local_err);
990 ret = -EINVAL;
991 goto fail;
992 }
993
994 l2_cache_size /= l2_cache_entry_size;
995 if (l2_cache_size < MIN_L2_CACHE_SIZE) {
996 l2_cache_size = MIN_L2_CACHE_SIZE;
997 }
998 if (l2_cache_size > INT_MAX) {
999 error_setg(errp, "L2 cache size too big");
1000 ret = -EINVAL;
1001 goto fail;
1002 }
1003
1004 refcount_cache_size /= s->cluster_size;
1005 if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
1006 refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
1007 }
1008 if (refcount_cache_size > INT_MAX) {
1009 error_setg(errp, "Refcount cache size too big");
1010 ret = -EINVAL;
1011 goto fail;
1012 }
1013
1014
1015 if (s->l2_table_cache) {
1016 ret = qcow2_cache_flush(bs, s->l2_table_cache);
1017 if (ret) {
1018 error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
1019 goto fail;
1020 }
1021 }
1022
1023 if (s->refcount_block_cache) {
1024 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
1025 if (ret) {
1026 error_setg_errno(errp, -ret,
1027 "Failed to flush the refcount block cache");
1028 goto fail;
1029 }
1030 }
1031
1032 r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t);
1033 r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
1034 l2_cache_entry_size);
1035 r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
1036 s->cluster_size);
1037 if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
1038 error_setg(errp, "Could not allocate metadata caches");
1039 ret = -ENOMEM;
1040 goto fail;
1041 }
1042
1043
1044 r->cache_clean_interval =
1045 qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
1046 DEFAULT_CACHE_CLEAN_INTERVAL);
1047#ifndef CONFIG_LINUX
1048 if (r->cache_clean_interval != 0) {
1049 error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
1050 " not supported on this host");
1051 ret = -EINVAL;
1052 goto fail;
1053 }
1054#endif
1055 if (r->cache_clean_interval > UINT_MAX) {
1056 error_setg(errp, "Cache clean interval too big");
1057 ret = -EINVAL;
1058 goto fail;
1059 }
1060
1061
1062 r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
1063 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
1064 if (r->use_lazy_refcounts && s->qcow_version < 3) {
1065 error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
1066 "qemu 1.1 compatibility level");
1067 ret = -EINVAL;
1068 goto fail;
1069 }
1070
1071 if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
1072 ret = qcow2_mark_clean(bs);
1073 if (ret < 0) {
1074 error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
1075 goto fail;
1076 }
1077 }
1078
1079
1080 opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
1081 opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
1082 if (opt_overlap_check_template && opt_overlap_check &&
1083 strcmp(opt_overlap_check_template, opt_overlap_check))
1084 {
1085 error_setg(errp, "Conflicting values for qcow2 options '"
1086 QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
1087 "' ('%s')", opt_overlap_check, opt_overlap_check_template);
1088 ret = -EINVAL;
1089 goto fail;
1090 }
1091 if (!opt_overlap_check) {
1092 opt_overlap_check = opt_overlap_check_template ?: "cached";
1093 }
1094
1095 if (!strcmp(opt_overlap_check, "none")) {
1096 overlap_check_template = 0;
1097 } else if (!strcmp(opt_overlap_check, "constant")) {
1098 overlap_check_template = QCOW2_OL_CONSTANT;
1099 } else if (!strcmp(opt_overlap_check, "cached")) {
1100 overlap_check_template = QCOW2_OL_CACHED;
1101 } else if (!strcmp(opt_overlap_check, "all")) {
1102 overlap_check_template = QCOW2_OL_ALL;
1103 } else {
1104 error_setg(errp, "Unsupported value '%s' for qcow2 option "
1105 "'overlap-check'. Allowed are any of the following: "
1106 "none, constant, cached, all", opt_overlap_check);
1107 ret = -EINVAL;
1108 goto fail;
1109 }
1110
1111 r->overlap_check = 0;
1112 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
1113
1114
1115 r->overlap_check |=
1116 qemu_opt_get_bool(opts, overlap_bool_option_names[i],
1117 overlap_check_template & (1 << i)) << i;
1118 }
1119
1120 r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
1121 r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
1122 r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
1123 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
1124 flags & BDRV_O_UNMAP);
1125 r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
1126 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
1127 r->discard_passthrough[QCOW2_DISCARD_OTHER] =
1128 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
1129
1130 switch (s->crypt_method_header) {
1131 case QCOW_CRYPT_NONE:
1132 if (encryptfmt) {
1133 error_setg(errp, "No encryption in image header, but options "
1134 "specified format '%s'", encryptfmt);
1135 ret = -EINVAL;
1136 goto fail;
1137 }
1138 break;
1139
1140 case QCOW_CRYPT_AES:
1141 if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
1142 error_setg(errp,
1143 "Header reported 'aes' encryption format but "
1144 "options specify '%s'", encryptfmt);
1145 ret = -EINVAL;
1146 goto fail;
1147 }
1148 qdict_put_str(encryptopts, "format", "qcow");
1149 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1150 break;
1151
1152 case QCOW_CRYPT_LUKS:
1153 if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
1154 error_setg(errp,
1155 "Header reported 'luks' encryption format but "
1156 "options specify '%s'", encryptfmt);
1157 ret = -EINVAL;
1158 goto fail;
1159 }
1160 qdict_put_str(encryptopts, "format", "luks");
1161 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1162 break;
1163
1164 default:
1165 error_setg(errp, "Unsupported encryption method %d",
1166 s->crypt_method_header);
1167 break;
1168 }
1169 if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) {
1170 ret = -EINVAL;
1171 goto fail;
1172 }
1173
1174 ret = 0;
1175fail:
1176 qobject_unref(encryptopts);
1177 qemu_opts_del(opts);
1178 opts = NULL;
1179 return ret;
1180}
1181
1182static void qcow2_update_options_commit(BlockDriverState *bs,
1183 Qcow2ReopenState *r)
1184{
1185 BDRVQcow2State *s = bs->opaque;
1186 int i;
1187
1188 if (s->l2_table_cache) {
1189 qcow2_cache_destroy(s->l2_table_cache);
1190 }
1191 if (s->refcount_block_cache) {
1192 qcow2_cache_destroy(s->refcount_block_cache);
1193 }
1194 s->l2_table_cache = r->l2_table_cache;
1195 s->refcount_block_cache = r->refcount_block_cache;
1196 s->l2_slice_size = r->l2_slice_size;
1197
1198 s->overlap_check = r->overlap_check;
1199 s->use_lazy_refcounts = r->use_lazy_refcounts;
1200
1201 for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
1202 s->discard_passthrough[i] = r->discard_passthrough[i];
1203 }
1204
1205 if (s->cache_clean_interval != r->cache_clean_interval) {
1206 cache_clean_timer_del(bs);
1207 s->cache_clean_interval = r->cache_clean_interval;
1208 cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
1209 }
1210
1211 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1212 s->crypto_opts = r->crypto_opts;
1213}
1214
1215static void qcow2_update_options_abort(BlockDriverState *bs,
1216 Qcow2ReopenState *r)
1217{
1218 if (r->l2_table_cache) {
1219 qcow2_cache_destroy(r->l2_table_cache);
1220 }
1221 if (r->refcount_block_cache) {
1222 qcow2_cache_destroy(r->refcount_block_cache);
1223 }
1224 qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
1225}
1226
1227static int qcow2_update_options(BlockDriverState *bs, QDict *options,
1228 int flags, Error **errp)
1229{
1230 Qcow2ReopenState r = {};
1231 int ret;
1232
1233 ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
1234 if (ret >= 0) {
1235 qcow2_update_options_commit(bs, &r);
1236 } else {
1237 qcow2_update_options_abort(bs, &r);
1238 }
1239
1240 return ret;
1241}
1242
1243
1244static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
1245 int flags, Error **errp)
1246{
1247 BDRVQcow2State *s = bs->opaque;
1248 unsigned int len, i;
1249 int ret = 0;
1250 QCowHeader header;
1251 Error *local_err = NULL;
1252 uint64_t ext_end;
1253 uint64_t l1_vm_state_index;
1254 bool update_header = false;
1255
1256 ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
1257 if (ret < 0) {
1258 error_setg_errno(errp, -ret, "Could not read qcow2 header");
1259 goto fail;
1260 }
1261 header.magic = be32_to_cpu(header.magic);
1262 header.version = be32_to_cpu(header.version);
1263 header.backing_file_offset = be64_to_cpu(header.backing_file_offset);
1264 header.backing_file_size = be32_to_cpu(header.backing_file_size);
1265 header.size = be64_to_cpu(header.size);
1266 header.cluster_bits = be32_to_cpu(header.cluster_bits);
1267 header.crypt_method = be32_to_cpu(header.crypt_method);
1268 header.l1_table_offset = be64_to_cpu(header.l1_table_offset);
1269 header.l1_size = be32_to_cpu(header.l1_size);
1270 header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset);
1271 header.refcount_table_clusters =
1272 be32_to_cpu(header.refcount_table_clusters);
1273 header.snapshots_offset = be64_to_cpu(header.snapshots_offset);
1274 header.nb_snapshots = be32_to_cpu(header.nb_snapshots);
1275
1276 if (header.magic != QCOW_MAGIC) {
1277 error_setg(errp, "Image is not in qcow2 format");
1278 ret = -EINVAL;
1279 goto fail;
1280 }
1281 if (header.version < 2 || header.version > 3) {
1282 error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
1283 ret = -ENOTSUP;
1284 goto fail;
1285 }
1286
1287 s->qcow_version = header.version;
1288
1289
1290 if (header.cluster_bits < MIN_CLUSTER_BITS ||
1291 header.cluster_bits > MAX_CLUSTER_BITS) {
1292 error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
1293 header.cluster_bits);
1294 ret = -EINVAL;
1295 goto fail;
1296 }
1297
1298 s->cluster_bits = header.cluster_bits;
1299 s->cluster_size = 1 << s->cluster_bits;
1300
1301
1302 if (header.version == 2) {
1303 header.incompatible_features = 0;
1304 header.compatible_features = 0;
1305 header.autoclear_features = 0;
1306 header.refcount_order = 4;
1307 header.header_length = 72;
1308 } else {
1309 header.incompatible_features =
1310 be64_to_cpu(header.incompatible_features);
1311 header.compatible_features = be64_to_cpu(header.compatible_features);
1312 header.autoclear_features = be64_to_cpu(header.autoclear_features);
1313 header.refcount_order = be32_to_cpu(header.refcount_order);
1314 header.header_length = be32_to_cpu(header.header_length);
1315
1316 if (header.header_length < 104) {
1317 error_setg(errp, "qcow2 header too short");
1318 ret = -EINVAL;
1319 goto fail;
1320 }
1321 }
1322
1323 if (header.header_length > s->cluster_size) {
1324 error_setg(errp, "qcow2 header exceeds cluster size");
1325 ret = -EINVAL;
1326 goto fail;
1327 }
1328
1329 if (header.header_length > sizeof(header)) {
1330 s->unknown_header_fields_size = header.header_length - sizeof(header);
1331 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
1332 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
1333 s->unknown_header_fields_size);
1334 if (ret < 0) {
1335 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
1336 "fields");
1337 goto fail;
1338 }
1339 }
1340
1341 if (header.backing_file_offset > s->cluster_size) {
1342 error_setg(errp, "Invalid backing file offset");
1343 ret = -EINVAL;
1344 goto fail;
1345 }
1346
1347 if (header.backing_file_offset) {
1348 ext_end = header.backing_file_offset;
1349 } else {
1350 ext_end = 1 << header.cluster_bits;
1351 }
1352
1353
1354 s->incompatible_features = header.incompatible_features;
1355 s->compatible_features = header.compatible_features;
1356 s->autoclear_features = header.autoclear_features;
1357
1358 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
1359 void *feature_table = NULL;
1360 qcow2_read_extensions(bs, header.header_length, ext_end,
1361 &feature_table, flags, NULL, NULL);
1362 report_unsupported_feature(errp, feature_table,
1363 s->incompatible_features &
1364 ~QCOW2_INCOMPAT_MASK);
1365 ret = -ENOTSUP;
1366 g_free(feature_table);
1367 goto fail;
1368 }
1369
1370 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
1371
1372
1373 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
1374 error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
1375 "read/write");
1376 ret = -EACCES;
1377 goto fail;
1378 }
1379 }
1380
1381
1382 if (header.refcount_order > 6) {
1383 error_setg(errp, "Reference count entry width too large; may not "
1384 "exceed 64 bits");
1385 ret = -EINVAL;
1386 goto fail;
1387 }
1388 s->refcount_order = header.refcount_order;
1389 s->refcount_bits = 1 << s->refcount_order;
1390 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
1391 s->refcount_max += s->refcount_max - 1;
1392
1393 s->crypt_method_header = header.crypt_method;
1394 if (s->crypt_method_header) {
1395 if (bdrv_uses_whitelist() &&
1396 s->crypt_method_header == QCOW_CRYPT_AES) {
1397 error_setg(errp,
1398 "Use of AES-CBC encrypted qcow2 images is no longer "
1399 "supported in system emulators");
1400 error_append_hint(errp,
1401 "You can use 'qemu-img convert' to convert your "
1402 "image to an alternative supported format, such "
1403 "as unencrypted qcow2, or raw with the LUKS "
1404 "format instead.\n");
1405 ret = -ENOSYS;
1406 goto fail;
1407 }
1408
1409 if (s->crypt_method_header == QCOW_CRYPT_AES) {
1410 s->crypt_physical_offset = false;
1411 } else {
1412
1413
1414
1415 s->crypt_physical_offset = true;
1416 }
1417
1418 bs->encrypted = true;
1419 }
1420
1421 s->l2_bits = s->cluster_bits - 3;
1422 s->l2_size = 1 << s->l2_bits;
1423
1424 s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
1425 s->refcount_block_size = 1 << s->refcount_block_bits;
1426 bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
1427 s->csize_shift = (62 - (s->cluster_bits - 8));
1428 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
1429 s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
1430
1431 s->refcount_table_offset = header.refcount_table_offset;
1432 s->refcount_table_size =
1433 header.refcount_table_clusters << (s->cluster_bits - 3);
1434
1435 if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
1436 error_setg(errp, "Image does not contain a reference count table");
1437 ret = -EINVAL;
1438 goto fail;
1439 }
1440
1441 ret = qcow2_validate_table(bs, s->refcount_table_offset,
1442 header.refcount_table_clusters,
1443 s->cluster_size, QCOW_MAX_REFTABLE_SIZE,
1444 "Reference count table", errp);
1445 if (ret < 0) {
1446 goto fail;
1447 }
1448
1449 if (!(flags & BDRV_O_CHECK)) {
1450
1451
1452
1453
1454
1455
1456 ret = qcow2_validate_table(bs, header.snapshots_offset,
1457 header.nb_snapshots,
1458 sizeof(QCowSnapshotHeader),
1459 sizeof(QCowSnapshotHeader) *
1460 QCOW_MAX_SNAPSHOTS,
1461 "Snapshot table", errp);
1462 if (ret < 0) {
1463 goto fail;
1464 }
1465 }
1466
1467
1468 ret = qcow2_validate_table(bs, header.l1_table_offset,
1469 header.l1_size, sizeof(uint64_t),
1470 QCOW_MAX_L1_SIZE, "Active L1 table", errp);
1471 if (ret < 0) {
1472 goto fail;
1473 }
1474 s->l1_size = header.l1_size;
1475 s->l1_table_offset = header.l1_table_offset;
1476
1477 l1_vm_state_index = size_to_l1(s, header.size);
1478 if (l1_vm_state_index > INT_MAX) {
1479 error_setg(errp, "Image is too big");
1480 ret = -EFBIG;
1481 goto fail;
1482 }
1483 s->l1_vm_state_index = l1_vm_state_index;
1484
1485
1486
1487 if (s->l1_size < s->l1_vm_state_index) {
1488 error_setg(errp, "L1 table is too small");
1489 ret = -EINVAL;
1490 goto fail;
1491 }
1492
1493 if (s->l1_size > 0) {
1494 s->l1_table = qemu_try_blockalign(bs->file->bs,
1495 ROUND_UP(s->l1_size * sizeof(uint64_t), 512));
1496 if (s->l1_table == NULL) {
1497 error_setg(errp, "Could not allocate L1 table");
1498 ret = -ENOMEM;
1499 goto fail;
1500 }
1501 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
1502 s->l1_size * sizeof(uint64_t));
1503 if (ret < 0) {
1504 error_setg_errno(errp, -ret, "Could not read L1 table");
1505 goto fail;
1506 }
1507 for(i = 0;i < s->l1_size; i++) {
1508 s->l1_table[i] = be64_to_cpu(s->l1_table[i]);
1509 }
1510 }
1511
1512
1513 ret = qcow2_update_options(bs, options, flags, errp);
1514 if (ret < 0) {
1515 goto fail;
1516 }
1517
1518 s->flags = flags;
1519
1520 ret = qcow2_refcount_init(bs);
1521 if (ret != 0) {
1522 error_setg_errno(errp, -ret, "Could not initialize refcount handling");
1523 goto fail;
1524 }
1525
1526 QLIST_INIT(&s->cluster_allocs);
1527 QTAILQ_INIT(&s->discards);
1528
1529
1530 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
1531 flags, &update_header, &local_err)) {
1532 error_propagate(errp, local_err);
1533 ret = -EINVAL;
1534 goto fail;
1535 }
1536
1537
1538 s->data_file = bdrv_open_child(NULL, options, "data-file", bs, &child_file,
1539 true, &local_err);
1540 if (local_err) {
1541 error_propagate(errp, local_err);
1542 ret = -EINVAL;
1543 goto fail;
1544 }
1545
1546 if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
1547 if (!s->data_file && s->image_data_file) {
1548 s->data_file = bdrv_open_child(s->image_data_file, options,
1549 "data-file", bs, &child_file,
1550 false, errp);
1551 if (!s->data_file) {
1552 ret = -EINVAL;
1553 goto fail;
1554 }
1555 }
1556 if (!s->data_file) {
1557 error_setg(errp, "'data-file' is required for this image");
1558 ret = -EINVAL;
1559 goto fail;
1560 }
1561 } else {
1562 if (s->data_file) {
1563 error_setg(errp, "'data-file' can only be set for images with an "
1564 "external data file");
1565 ret = -EINVAL;
1566 goto fail;
1567 }
1568
1569 s->data_file = bs->file;
1570
1571 if (data_file_is_raw(bs)) {
1572 error_setg(errp, "data-file-raw requires a data file");
1573 ret = -EINVAL;
1574 goto fail;
1575 }
1576 }
1577
1578
1579
1580
1581
1582 if (s->crypt_method_header && !s->crypto) {
1583 if (s->crypt_method_header == QCOW_CRYPT_AES) {
1584 unsigned int cflags = 0;
1585 if (flags & BDRV_O_NO_IO) {
1586 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
1587 }
1588 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
1589 NULL, NULL, cflags,
1590 QCOW2_MAX_THREADS, errp);
1591 if (!s->crypto) {
1592 ret = -EINVAL;
1593 goto fail;
1594 }
1595 } else if (!(flags & BDRV_O_NO_IO)) {
1596 error_setg(errp, "Missing CRYPTO header for crypt method %d",
1597 s->crypt_method_header);
1598 ret = -EINVAL;
1599 goto fail;
1600 }
1601 }
1602
1603
1604 if (header.backing_file_offset != 0) {
1605 len = header.backing_file_size;
1606 if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
1607 len >= sizeof(bs->backing_file)) {
1608 error_setg(errp, "Backing file name too long");
1609 ret = -EINVAL;
1610 goto fail;
1611 }
1612 ret = bdrv_pread(bs->file, header.backing_file_offset,
1613 bs->auto_backing_file, len);
1614 if (ret < 0) {
1615 error_setg_errno(errp, -ret, "Could not read backing file name");
1616 goto fail;
1617 }
1618 bs->auto_backing_file[len] = '\0';
1619 pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1620 bs->auto_backing_file);
1621 s->image_backing_file = g_strdup(bs->auto_backing_file);
1622 }
1623
1624
1625
1626
1627
1628
1629 if (!(flags & BDRV_O_CHECK)) {
1630 s->snapshots_offset = header.snapshots_offset;
1631 s->nb_snapshots = header.nb_snapshots;
1632
1633 ret = qcow2_read_snapshots(bs, errp);
1634 if (ret < 0) {
1635 goto fail;
1636 }
1637 }
1638
1639
1640 update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
1641 update_header =
1642 update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
1643 if (update_header) {
1644 s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
1645 }
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705 if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
1706
1707 bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err);
1708
1709 update_header = update_header && !header_updated;
1710 }
1711 if (local_err != NULL) {
1712 error_propagate(errp, local_err);
1713 ret = -EINVAL;
1714 goto fail;
1715 }
1716
1717 if (update_header) {
1718 ret = qcow2_update_header(bs);
1719 if (ret < 0) {
1720 error_setg_errno(errp, -ret, "Could not update qcow2 header");
1721 goto fail;
1722 }
1723 }
1724
1725 bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0;
1726
1727
1728 if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
1729 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
1730 BdrvCheckResult result = {0};
1731
1732 ret = qcow2_co_check_locked(bs, &result,
1733 BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
1734 if (ret < 0 || result.check_errors) {
1735 if (ret >= 0) {
1736 ret = -EIO;
1737 }
1738 error_setg_errno(errp, -ret, "Could not repair dirty image");
1739 goto fail;
1740 }
1741 }
1742
1743#ifdef DEBUG_ALLOC
1744 {
1745 BdrvCheckResult result = {0};
1746 qcow2_check_refcounts(bs, &result, 0);
1747 }
1748#endif
1749
1750 qemu_co_queue_init(&s->thread_task_queue);
1751
1752 return ret;
1753
1754 fail:
1755 g_free(s->image_data_file);
1756 if (has_data_file(bs)) {
1757 bdrv_unref_child(bs, s->data_file);
1758 }
1759 g_free(s->unknown_header_fields);
1760 cleanup_unknown_header_ext(bs);
1761 qcow2_free_snapshots(bs);
1762 qcow2_refcount_close(bs);
1763 qemu_vfree(s->l1_table);
1764
1765 s->l1_table = NULL;
1766 cache_clean_timer_del(bs);
1767 if (s->l2_table_cache) {
1768 qcow2_cache_destroy(s->l2_table_cache);
1769 }
1770 if (s->refcount_block_cache) {
1771 qcow2_cache_destroy(s->refcount_block_cache);
1772 }
1773 qcrypto_block_free(s->crypto);
1774 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1775 return ret;
1776}
1777
1778typedef struct QCow2OpenCo {
1779 BlockDriverState *bs;
1780 QDict *options;
1781 int flags;
1782 Error **errp;
1783 int ret;
1784} QCow2OpenCo;
1785
1786static void coroutine_fn qcow2_open_entry(void *opaque)
1787{
1788 QCow2OpenCo *qoc = opaque;
1789 BDRVQcow2State *s = qoc->bs->opaque;
1790
1791 qemu_co_mutex_lock(&s->lock);
1792 qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp);
1793 qemu_co_mutex_unlock(&s->lock);
1794}
1795
1796static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
1797 Error **errp)
1798{
1799 BDRVQcow2State *s = bs->opaque;
1800 QCow2OpenCo qoc = {
1801 .bs = bs,
1802 .options = options,
1803 .flags = flags,
1804 .errp = errp,
1805 .ret = -EINPROGRESS
1806 };
1807
1808 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
1809 false, errp);
1810 if (!bs->file) {
1811 return -EINVAL;
1812 }
1813
1814
1815 qemu_co_mutex_init(&s->lock);
1816
1817 if (qemu_in_coroutine()) {
1818
1819 qcow2_open_entry(&qoc);
1820 } else {
1821 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
1822 qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
1823 BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
1824 }
1825 return qoc.ret;
1826}
1827
1828static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
1829{
1830 BDRVQcow2State *s = bs->opaque;
1831
1832 if (bs->encrypted) {
1833
1834 bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
1835 }
1836 bs->bl.pwrite_zeroes_alignment = s->cluster_size;
1837 bs->bl.pdiscard_alignment = s->cluster_size;
1838}
1839
1840static int qcow2_reopen_prepare(BDRVReopenState *state,
1841 BlockReopenQueue *queue, Error **errp)
1842{
1843 Qcow2ReopenState *r;
1844 int ret;
1845
1846 r = g_new0(Qcow2ReopenState, 1);
1847 state->opaque = r;
1848
1849 ret = qcow2_update_options_prepare(state->bs, r, state->options,
1850 state->flags, errp);
1851 if (ret < 0) {
1852 goto fail;
1853 }
1854
1855
1856 if ((state->flags & BDRV_O_RDWR) == 0) {
1857 ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
1858 if (ret < 0) {
1859 goto fail;
1860 }
1861
1862 ret = bdrv_flush(state->bs);
1863 if (ret < 0) {
1864 goto fail;
1865 }
1866
1867 ret = qcow2_mark_clean(state->bs);
1868 if (ret < 0) {
1869 goto fail;
1870 }
1871 }
1872
1873 return 0;
1874
1875fail:
1876 qcow2_update_options_abort(state->bs, r);
1877 g_free(r);
1878 return ret;
1879}
1880
1881static void qcow2_reopen_commit(BDRVReopenState *state)
1882{
1883 qcow2_update_options_commit(state->bs, state->opaque);
1884 if (state->flags & BDRV_O_RDWR) {
1885 Error *local_err = NULL;
1886
1887 if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) {
1888
1889
1890
1891
1892
1893 error_reportf_err(local_err,
1894 "%s: Failed to make dirty bitmaps writable: ",
1895 bdrv_get_node_name(state->bs));
1896 }
1897 }
1898 g_free(state->opaque);
1899}
1900
1901static void qcow2_reopen_abort(BDRVReopenState *state)
1902{
1903 qcow2_update_options_abort(state->bs, state->opaque);
1904 g_free(state->opaque);
1905}
1906
1907static void qcow2_join_options(QDict *options, QDict *old_options)
1908{
1909 bool has_new_overlap_template =
1910 qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
1911 qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
1912 bool has_new_total_cache_size =
1913 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
1914 bool has_all_cache_options;
1915
1916
1917 if (has_new_overlap_template) {
1918 qdict_del(old_options, QCOW2_OPT_OVERLAP);
1919 qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
1920 qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
1921 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
1922 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
1923 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
1924 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
1925 qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
1926 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
1927 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
1928 }
1929
1930
1931 if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
1932 qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
1933 qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1934 }
1935
1936 qdict_join(options, old_options, false);
1937
1938
1939
1940
1941
1942
1943 has_all_cache_options =
1944 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
1945 qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
1946 qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1947
1948 if (has_all_cache_options && !has_new_total_cache_size) {
1949 qdict_del(options, QCOW2_OPT_CACHE_SIZE);
1950 }
1951}
1952
1953static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
1954 bool want_zero,
1955 int64_t offset, int64_t count,
1956 int64_t *pnum, int64_t *map,
1957 BlockDriverState **file)
1958{
1959 BDRVQcow2State *s = bs->opaque;
1960 uint64_t cluster_offset;
1961 int index_in_cluster, ret;
1962 unsigned int bytes;
1963 int status = 0;
1964
1965 qemu_co_mutex_lock(&s->lock);
1966
1967 if (!s->metadata_preallocation_checked) {
1968 ret = qcow2_detect_metadata_preallocation(bs);
1969 s->metadata_preallocation = (ret == 1);
1970 s->metadata_preallocation_checked = true;
1971 }
1972
1973 bytes = MIN(INT_MAX, count);
1974 ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
1975 qemu_co_mutex_unlock(&s->lock);
1976 if (ret < 0) {
1977 return ret;
1978 }
1979
1980 *pnum = bytes;
1981
1982 if ((ret == QCOW2_CLUSTER_NORMAL || ret == QCOW2_CLUSTER_ZERO_ALLOC) &&
1983 !s->crypto) {
1984 index_in_cluster = offset & (s->cluster_size - 1);
1985 *map = cluster_offset | index_in_cluster;
1986 *file = s->data_file->bs;
1987 status |= BDRV_BLOCK_OFFSET_VALID;
1988 }
1989 if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
1990 status |= BDRV_BLOCK_ZERO;
1991 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
1992 status |= BDRV_BLOCK_DATA;
1993 }
1994 if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) &&
1995 (status & BDRV_BLOCK_OFFSET_VALID))
1996 {
1997 status |= BDRV_BLOCK_RECURSE;
1998 }
1999 return status;
2000}
2001
2002static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
2003 QCowL2Meta **pl2meta,
2004 bool link_l2)
2005{
2006 int ret = 0;
2007 QCowL2Meta *l2meta = *pl2meta;
2008
2009 while (l2meta != NULL) {
2010 QCowL2Meta *next;
2011
2012 if (link_l2) {
2013 ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
2014 if (ret) {
2015 goto out;
2016 }
2017 } else {
2018 qcow2_alloc_cluster_abort(bs, l2meta);
2019 }
2020
2021
2022 if (l2meta->nb_clusters != 0) {
2023 QLIST_REMOVE(l2meta, next_in_flight);
2024 }
2025
2026 qemu_co_queue_restart_all(&l2meta->dependent_requests);
2027
2028 next = l2meta->next;
2029 g_free(l2meta);
2030 l2meta = next;
2031 }
2032out:
2033 *pl2meta = l2meta;
2034 return ret;
2035}
2036
2037static coroutine_fn int
2038qcow2_co_preadv_encrypted(BlockDriverState *bs,
2039 uint64_t file_cluster_offset,
2040 uint64_t offset,
2041 uint64_t bytes,
2042 QEMUIOVector *qiov,
2043 uint64_t qiov_offset)
2044{
2045 int ret;
2046 BDRVQcow2State *s = bs->opaque;
2047 uint8_t *buf;
2048
2049 assert(bs->encrypted && s->crypto);
2050 assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060 buf = qemu_try_blockalign(s->data_file->bs, bytes);
2061 if (buf == NULL) {
2062 return -ENOMEM;
2063 }
2064
2065 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
2066 ret = bdrv_co_pread(s->data_file,
2067 file_cluster_offset + offset_into_cluster(s, offset),
2068 bytes, buf, 0);
2069 if (ret < 0) {
2070 goto fail;
2071 }
2072
2073 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
2074 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
2075 if (qcow2_co_decrypt(bs,
2076 file_cluster_offset + offset_into_cluster(s, offset),
2077 offset, buf, bytes) < 0)
2078 {
2079 ret = -EIO;
2080 goto fail;
2081 }
2082 qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes);
2083
2084fail:
2085 qemu_vfree(buf);
2086
2087 return ret;
2088}
2089
2090typedef struct Qcow2AioTask {
2091 AioTask task;
2092
2093 BlockDriverState *bs;
2094 QCow2ClusterType cluster_type;
2095 uint64_t file_cluster_offset;
2096 uint64_t offset;
2097 uint64_t bytes;
2098 QEMUIOVector *qiov;
2099 uint64_t qiov_offset;
2100 QCowL2Meta *l2meta;
2101} Qcow2AioTask;
2102
2103static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
2104static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
2105 AioTaskPool *pool,
2106 AioTaskFunc func,
2107 QCow2ClusterType cluster_type,
2108 uint64_t file_cluster_offset,
2109 uint64_t offset,
2110 uint64_t bytes,
2111 QEMUIOVector *qiov,
2112 size_t qiov_offset,
2113 QCowL2Meta *l2meta)
2114{
2115 Qcow2AioTask local_task;
2116 Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
2117
2118 *task = (Qcow2AioTask) {
2119 .task.func = func,
2120 .bs = bs,
2121 .cluster_type = cluster_type,
2122 .qiov = qiov,
2123 .file_cluster_offset = file_cluster_offset,
2124 .offset = offset,
2125 .bytes = bytes,
2126 .qiov_offset = qiov_offset,
2127 .l2meta = l2meta,
2128 };
2129
2130 trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
2131 func == qcow2_co_preadv_task_entry ? "read" : "write",
2132 cluster_type, file_cluster_offset, offset, bytes,
2133 qiov, qiov_offset);
2134
2135 if (!pool) {
2136 return func(&task->task);
2137 }
2138
2139 aio_task_pool_start_task(pool, &task->task);
2140
2141 return 0;
2142}
2143
2144static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
2145 QCow2ClusterType cluster_type,
2146 uint64_t file_cluster_offset,
2147 uint64_t offset, uint64_t bytes,
2148 QEMUIOVector *qiov,
2149 size_t qiov_offset)
2150{
2151 BDRVQcow2State *s = bs->opaque;
2152 int offset_in_cluster = offset_into_cluster(s, offset);
2153
2154 switch (cluster_type) {
2155 case QCOW2_CLUSTER_ZERO_PLAIN:
2156 case QCOW2_CLUSTER_ZERO_ALLOC:
2157
2158 g_assert_not_reached();
2159
2160 case QCOW2_CLUSTER_UNALLOCATED:
2161 assert(bs->backing);
2162
2163 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
2164 return bdrv_co_preadv_part(bs->backing, offset, bytes,
2165 qiov, qiov_offset, 0);
2166
2167 case QCOW2_CLUSTER_COMPRESSED:
2168 return qcow2_co_preadv_compressed(bs, file_cluster_offset,
2169 offset, bytes, qiov, qiov_offset);
2170
2171 case QCOW2_CLUSTER_NORMAL:
2172 if ((file_cluster_offset & 511) != 0) {
2173 return -EIO;
2174 }
2175
2176 if (bs->encrypted) {
2177 return qcow2_co_preadv_encrypted(bs, file_cluster_offset,
2178 offset, bytes, qiov, qiov_offset);
2179 }
2180
2181 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
2182 return bdrv_co_preadv_part(s->data_file,
2183 file_cluster_offset + offset_in_cluster,
2184 bytes, qiov, qiov_offset, 0);
2185
2186 default:
2187 g_assert_not_reached();
2188 }
2189
2190 g_assert_not_reached();
2191}
2192
2193static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
2194{
2195 Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
2196
2197 assert(!t->l2meta);
2198
2199 return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset,
2200 t->offset, t->bytes, t->qiov, t->qiov_offset);
2201}
2202
2203static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
2204 uint64_t offset, uint64_t bytes,
2205 QEMUIOVector *qiov,
2206 size_t qiov_offset, int flags)
2207{
2208 BDRVQcow2State *s = bs->opaque;
2209 int ret = 0;
2210 unsigned int cur_bytes;
2211 uint64_t cluster_offset = 0;
2212 AioTaskPool *aio = NULL;
2213
2214 while (bytes != 0 && aio_task_pool_status(aio) == 0) {
2215
2216 cur_bytes = MIN(bytes, INT_MAX);
2217 if (s->crypto) {
2218 cur_bytes = MIN(cur_bytes,
2219 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2220 }
2221
2222 qemu_co_mutex_lock(&s->lock);
2223 ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
2224 qemu_co_mutex_unlock(&s->lock);
2225 if (ret < 0) {
2226 goto out;
2227 }
2228
2229 if (ret == QCOW2_CLUSTER_ZERO_PLAIN ||
2230 ret == QCOW2_CLUSTER_ZERO_ALLOC ||
2231 (ret == QCOW2_CLUSTER_UNALLOCATED && !bs->backing))
2232 {
2233 qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
2234 } else {
2235 if (!aio && cur_bytes != bytes) {
2236 aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
2237 }
2238 ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret,
2239 cluster_offset, offset, cur_bytes,
2240 qiov, qiov_offset, NULL);
2241 if (ret < 0) {
2242 goto out;
2243 }
2244 }
2245
2246 bytes -= cur_bytes;
2247 offset += cur_bytes;
2248 qiov_offset += cur_bytes;
2249 }
2250
2251out:
2252 if (aio) {
2253 aio_task_pool_wait_all(aio);
2254 if (ret == 0) {
2255 ret = aio_task_pool_status(aio);
2256 }
2257 g_free(aio);
2258 }
2259
2260 return ret;
2261}
2262
2263
2264
2265static bool merge_cow(uint64_t offset, unsigned bytes,
2266 QEMUIOVector *qiov, size_t qiov_offset,
2267 QCowL2Meta *l2meta)
2268{
2269 QCowL2Meta *m;
2270
2271 for (m = l2meta; m != NULL; m = m->next) {
2272
2273 if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
2274 continue;
2275 }
2276
2277
2278 if (m->skip_cow) {
2279 continue;
2280 }
2281
2282
2283
2284 if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
2285 continue;
2286 }
2287
2288
2289
2290 if (m->offset + m->cow_end.offset != offset + bytes) {
2291 continue;
2292 }
2293
2294
2295
2296 if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
2297 continue;
2298 }
2299
2300 m->data_qiov = qiov;
2301 m->data_qiov_offset = qiov_offset;
2302 return true;
2303 }
2304
2305 return false;
2306}
2307
2308static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes)
2309{
2310 int64_t nr;
2311 return !bytes ||
2312 (!bdrv_is_allocated_above(bs, NULL, false, offset, bytes, &nr) &&
2313 nr == bytes);
2314}
2315
2316static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
2317{
2318
2319
2320
2321
2322
2323
2324 return is_unallocated(bs, m->offset + m->cow_start.offset,
2325 m->cow_start.nb_bytes) &&
2326 is_unallocated(bs, m->offset + m->cow_end.offset,
2327 m->cow_end.nb_bytes);
2328}
2329
2330static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
2331{
2332 BDRVQcow2State *s = bs->opaque;
2333 QCowL2Meta *m;
2334
2335 if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
2336 return 0;
2337 }
2338
2339 if (bs->encrypted) {
2340 return 0;
2341 }
2342
2343 for (m = l2meta; m != NULL; m = m->next) {
2344 int ret;
2345
2346 if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
2347 continue;
2348 }
2349
2350 if (!is_zero_cow(bs, m)) {
2351 continue;
2352 }
2353
2354
2355
2356
2357
2358
2359 ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset,
2360 m->nb_clusters * s->cluster_size,
2361 true);
2362 if (ret < 0) {
2363 return ret;
2364 }
2365
2366 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
2367 ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset,
2368 m->nb_clusters * s->cluster_size,
2369 BDRV_REQ_NO_FALLBACK);
2370 if (ret < 0) {
2371 if (ret != -ENOTSUP && ret != -EAGAIN) {
2372 return ret;
2373 }
2374 continue;
2375 }
2376
2377 trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
2378 m->skip_cow = true;
2379 }
2380 return 0;
2381}
2382
2383
2384
2385
2386
2387
2388
2389static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
2390 uint64_t file_cluster_offset,
2391 uint64_t offset, uint64_t bytes,
2392 QEMUIOVector *qiov,
2393 uint64_t qiov_offset,
2394 QCowL2Meta *l2meta)
2395{
2396 int ret;
2397 BDRVQcow2State *s = bs->opaque;
2398 void *crypt_buf = NULL;
2399 int offset_in_cluster = offset_into_cluster(s, offset);
2400 QEMUIOVector encrypted_qiov;
2401
2402 if (bs->encrypted) {
2403 assert(s->crypto);
2404 assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2405 crypt_buf = qemu_try_blockalign(bs->file->bs, bytes);
2406 if (crypt_buf == NULL) {
2407 ret = -ENOMEM;
2408 goto out_unlocked;
2409 }
2410 qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes);
2411
2412 if (qcow2_co_encrypt(bs, file_cluster_offset + offset_in_cluster,
2413 offset, crypt_buf, bytes) < 0)
2414 {
2415 ret = -EIO;
2416 goto out_unlocked;
2417 }
2418
2419 qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes);
2420 qiov = &encrypted_qiov;
2421 qiov_offset = 0;
2422 }
2423
2424
2425 ret = handle_alloc_space(bs, l2meta);
2426 if (ret < 0) {
2427 goto out_unlocked;
2428 }
2429
2430
2431
2432
2433
2434
2435
2436 if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
2437 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
2438 trace_qcow2_writev_data(qemu_coroutine_self(),
2439 file_cluster_offset + offset_in_cluster);
2440 ret = bdrv_co_pwritev_part(s->data_file,
2441 file_cluster_offset + offset_in_cluster,
2442 bytes, qiov, qiov_offset, 0);
2443 if (ret < 0) {
2444 goto out_unlocked;
2445 }
2446 }
2447
2448 qemu_co_mutex_lock(&s->lock);
2449
2450 ret = qcow2_handle_l2meta(bs, &l2meta, true);
2451 goto out_locked;
2452
2453out_unlocked:
2454 qemu_co_mutex_lock(&s->lock);
2455
2456out_locked:
2457 qcow2_handle_l2meta(bs, &l2meta, false);
2458 qemu_co_mutex_unlock(&s->lock);
2459
2460 qemu_vfree(crypt_buf);
2461
2462 return ret;
2463}
2464
2465static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
2466{
2467 Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
2468
2469 assert(!t->cluster_type);
2470
2471 return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset,
2472 t->offset, t->bytes, t->qiov, t->qiov_offset,
2473 t->l2meta);
2474}
2475
2476static coroutine_fn int qcow2_co_pwritev_part(
2477 BlockDriverState *bs, uint64_t offset, uint64_t bytes,
2478 QEMUIOVector *qiov, size_t qiov_offset, int flags)
2479{
2480 BDRVQcow2State *s = bs->opaque;
2481 int offset_in_cluster;
2482 int ret;
2483 unsigned int cur_bytes;
2484 uint64_t cluster_offset;
2485 QCowL2Meta *l2meta = NULL;
2486 AioTaskPool *aio = NULL;
2487
2488 trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
2489
2490 while (bytes != 0 && aio_task_pool_status(aio) == 0) {
2491
2492 l2meta = NULL;
2493
2494 trace_qcow2_writev_start_part(qemu_coroutine_self());
2495 offset_in_cluster = offset_into_cluster(s, offset);
2496 cur_bytes = MIN(bytes, INT_MAX);
2497 if (bs->encrypted) {
2498 cur_bytes = MIN(cur_bytes,
2499 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
2500 - offset_in_cluster);
2501 }
2502
2503 qemu_co_mutex_lock(&s->lock);
2504
2505 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
2506 &cluster_offset, &l2meta);
2507 if (ret < 0) {
2508 goto out_locked;
2509 }
2510
2511 assert((cluster_offset & 511) == 0);
2512
2513 ret = qcow2_pre_write_overlap_check(bs, 0,
2514 cluster_offset + offset_in_cluster,
2515 cur_bytes, true);
2516 if (ret < 0) {
2517 goto out_locked;
2518 }
2519
2520 qemu_co_mutex_unlock(&s->lock);
2521
2522 if (!aio && cur_bytes != bytes) {
2523 aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
2524 }
2525 ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
2526 cluster_offset, offset, cur_bytes,
2527 qiov, qiov_offset, l2meta);
2528 l2meta = NULL;
2529 if (ret < 0) {
2530 goto fail_nometa;
2531 }
2532
2533 bytes -= cur_bytes;
2534 offset += cur_bytes;
2535 qiov_offset += cur_bytes;
2536 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
2537 }
2538 ret = 0;
2539
2540 qemu_co_mutex_lock(&s->lock);
2541
2542out_locked:
2543 qcow2_handle_l2meta(bs, &l2meta, false);
2544
2545 qemu_co_mutex_unlock(&s->lock);
2546
2547fail_nometa:
2548 if (aio) {
2549 aio_task_pool_wait_all(aio);
2550 if (ret == 0) {
2551 ret = aio_task_pool_status(aio);
2552 }
2553 g_free(aio);
2554 }
2555
2556 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
2557
2558 return ret;
2559}
2560
2561static int qcow2_inactivate(BlockDriverState *bs)
2562{
2563 BDRVQcow2State *s = bs->opaque;
2564 int ret, result = 0;
2565 Error *local_err = NULL;
2566
2567 qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err);
2568 if (local_err != NULL) {
2569 result = -EINVAL;
2570 error_reportf_err(local_err, "Lost persistent bitmaps during "
2571 "inactivation of node '%s': ",
2572 bdrv_get_device_or_node_name(bs));
2573 }
2574
2575 ret = qcow2_cache_flush(bs, s->l2_table_cache);
2576 if (ret) {
2577 result = ret;
2578 error_report("Failed to flush the L2 table cache: %s",
2579 strerror(-ret));
2580 }
2581
2582 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
2583 if (ret) {
2584 result = ret;
2585 error_report("Failed to flush the refcount block cache: %s",
2586 strerror(-ret));
2587 }
2588
2589 if (result == 0) {
2590 qcow2_mark_clean(bs);
2591 }
2592
2593 return result;
2594}
2595
2596static void qcow2_close(BlockDriverState *bs)
2597{
2598 BDRVQcow2State *s = bs->opaque;
2599 qemu_vfree(s->l1_table);
2600
2601 s->l1_table = NULL;
2602
2603 if (!(s->flags & BDRV_O_INACTIVE)) {
2604 qcow2_inactivate(bs);
2605 }
2606
2607 cache_clean_timer_del(bs);
2608 qcow2_cache_destroy(s->l2_table_cache);
2609 qcow2_cache_destroy(s->refcount_block_cache);
2610
2611 qcrypto_block_free(s->crypto);
2612 s->crypto = NULL;
2613
2614 g_free(s->unknown_header_fields);
2615 cleanup_unknown_header_ext(bs);
2616
2617 g_free(s->image_data_file);
2618 g_free(s->image_backing_file);
2619 g_free(s->image_backing_format);
2620
2621 if (has_data_file(bs)) {
2622 bdrv_unref_child(bs, s->data_file);
2623 }
2624
2625 qcow2_refcount_close(bs);
2626 qcow2_free_snapshots(bs);
2627}
2628
2629static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
2630 Error **errp)
2631{
2632 BDRVQcow2State *s = bs->opaque;
2633 int flags = s->flags;
2634 QCryptoBlock *crypto = NULL;
2635 QDict *options;
2636 Error *local_err = NULL;
2637 int ret;
2638
2639
2640
2641
2642
2643
2644 crypto = s->crypto;
2645 s->crypto = NULL;
2646
2647 qcow2_close(bs);
2648
2649 memset(s, 0, sizeof(BDRVQcow2State));
2650 options = qdict_clone_shallow(bs->options);
2651
2652 flags &= ~BDRV_O_INACTIVE;
2653 qemu_co_mutex_lock(&s->lock);
2654 ret = qcow2_do_open(bs, options, flags, &local_err);
2655 qemu_co_mutex_unlock(&s->lock);
2656 qobject_unref(options);
2657 if (local_err) {
2658 error_propagate_prepend(errp, local_err,
2659 "Could not reopen qcow2 layer: ");
2660 bs->drv = NULL;
2661 return;
2662 } else if (ret < 0) {
2663 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
2664 bs->drv = NULL;
2665 return;
2666 }
2667
2668 s->crypto = crypto;
2669}
2670
2671static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
2672 size_t len, size_t buflen)
2673{
2674 QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
2675 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
2676
2677 if (buflen < ext_len) {
2678 return -ENOSPC;
2679 }
2680
2681 *ext_backing_fmt = (QCowExtension) {
2682 .magic = cpu_to_be32(magic),
2683 .len = cpu_to_be32(len),
2684 };
2685
2686 if (len) {
2687 memcpy(buf + sizeof(QCowExtension), s, len);
2688 }
2689
2690 return ext_len;
2691}
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701int qcow2_update_header(BlockDriverState *bs)
2702{
2703 BDRVQcow2State *s = bs->opaque;
2704 QCowHeader *header;
2705 char *buf;
2706 size_t buflen = s->cluster_size;
2707 int ret;
2708 uint64_t total_size;
2709 uint32_t refcount_table_clusters;
2710 size_t header_length;
2711 Qcow2UnknownHeaderExtension *uext;
2712
2713 buf = qemu_blockalign(bs, buflen);
2714
2715
2716 header = (QCowHeader*) buf;
2717
2718 if (buflen < sizeof(*header)) {
2719 ret = -ENOSPC;
2720 goto fail;
2721 }
2722
2723 header_length = sizeof(*header) + s->unknown_header_fields_size;
2724 total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
2725 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
2726
2727 *header = (QCowHeader) {
2728
2729 .magic = cpu_to_be32(QCOW_MAGIC),
2730 .version = cpu_to_be32(s->qcow_version),
2731 .backing_file_offset = 0,
2732 .backing_file_size = 0,
2733 .cluster_bits = cpu_to_be32(s->cluster_bits),
2734 .size = cpu_to_be64(total_size),
2735 .crypt_method = cpu_to_be32(s->crypt_method_header),
2736 .l1_size = cpu_to_be32(s->l1_size),
2737 .l1_table_offset = cpu_to_be64(s->l1_table_offset),
2738 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
2739 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
2740 .nb_snapshots = cpu_to_be32(s->nb_snapshots),
2741 .snapshots_offset = cpu_to_be64(s->snapshots_offset),
2742
2743
2744 .incompatible_features = cpu_to_be64(s->incompatible_features),
2745 .compatible_features = cpu_to_be64(s->compatible_features),
2746 .autoclear_features = cpu_to_be64(s->autoclear_features),
2747 .refcount_order = cpu_to_be32(s->refcount_order),
2748 .header_length = cpu_to_be32(header_length),
2749 };
2750
2751
2752 switch (s->qcow_version) {
2753 case 2:
2754 ret = offsetof(QCowHeader, incompatible_features);
2755 break;
2756 case 3:
2757 ret = sizeof(*header);
2758 break;
2759 default:
2760 ret = -EINVAL;
2761 goto fail;
2762 }
2763
2764 buf += ret;
2765 buflen -= ret;
2766 memset(buf, 0, buflen);
2767
2768
2769 if (s->unknown_header_fields_size) {
2770 if (buflen < s->unknown_header_fields_size) {
2771 ret = -ENOSPC;
2772 goto fail;
2773 }
2774
2775 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
2776 buf += s->unknown_header_fields_size;
2777 buflen -= s->unknown_header_fields_size;
2778 }
2779
2780
2781 if (s->image_backing_format) {
2782 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
2783 s->image_backing_format,
2784 strlen(s->image_backing_format),
2785 buflen);
2786 if (ret < 0) {
2787 goto fail;
2788 }
2789
2790 buf += ret;
2791 buflen -= ret;
2792 }
2793
2794
2795 if (has_data_file(bs) && s->image_data_file) {
2796 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE,
2797 s->image_data_file, strlen(s->image_data_file),
2798 buflen);
2799 if (ret < 0) {
2800 goto fail;
2801 }
2802
2803 buf += ret;
2804 buflen -= ret;
2805 }
2806
2807
2808 if (s->crypto_header.offset != 0) {
2809 s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset);
2810 s->crypto_header.length = cpu_to_be64(s->crypto_header.length);
2811 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
2812 &s->crypto_header, sizeof(s->crypto_header),
2813 buflen);
2814 s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
2815 s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
2816 if (ret < 0) {
2817 goto fail;
2818 }
2819 buf += ret;
2820 buflen -= ret;
2821 }
2822
2823
2824 if (s->qcow_version >= 3) {
2825 static const Qcow2Feature features[] = {
2826 {
2827 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2828 .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
2829 .name = "dirty bit",
2830 },
2831 {
2832 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2833 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
2834 .name = "corrupt bit",
2835 },
2836 {
2837 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2838 .bit = QCOW2_INCOMPAT_DATA_FILE_BITNR,
2839 .name = "external data file",
2840 },
2841 {
2842 .type = QCOW2_FEAT_TYPE_COMPATIBLE,
2843 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
2844 .name = "lazy refcounts",
2845 },
2846 {
2847 .type = QCOW2_FEAT_TYPE_AUTOCLEAR,
2848 .bit = QCOW2_AUTOCLEAR_BITMAPS_BITNR,
2849 .name = "bitmaps",
2850 },
2851 {
2852 .type = QCOW2_FEAT_TYPE_AUTOCLEAR,
2853 .bit = QCOW2_AUTOCLEAR_DATA_FILE_RAW_BITNR,
2854 .name = "raw external data",
2855 },
2856 };
2857
2858 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
2859 features, sizeof(features), buflen);
2860 if (ret < 0) {
2861 goto fail;
2862 }
2863 buf += ret;
2864 buflen -= ret;
2865 }
2866
2867
2868 if (s->nb_bitmaps > 0) {
2869 Qcow2BitmapHeaderExt bitmaps_header = {
2870 .nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
2871 .bitmap_directory_size =
2872 cpu_to_be64(s->bitmap_directory_size),
2873 .bitmap_directory_offset =
2874 cpu_to_be64(s->bitmap_directory_offset)
2875 };
2876 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
2877 &bitmaps_header, sizeof(bitmaps_header),
2878 buflen);
2879 if (ret < 0) {
2880 goto fail;
2881 }
2882 buf += ret;
2883 buflen -= ret;
2884 }
2885
2886
2887 QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
2888 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
2889 if (ret < 0) {
2890 goto fail;
2891 }
2892
2893 buf += ret;
2894 buflen -= ret;
2895 }
2896
2897
2898 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
2899 if (ret < 0) {
2900 goto fail;
2901 }
2902
2903 buf += ret;
2904 buflen -= ret;
2905
2906
2907 if (s->image_backing_file) {
2908 size_t backing_file_len = strlen(s->image_backing_file);
2909
2910 if (buflen < backing_file_len) {
2911 ret = -ENOSPC;
2912 goto fail;
2913 }
2914
2915
2916 strncpy(buf, s->image_backing_file, buflen);
2917
2918 header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
2919 header->backing_file_size = cpu_to_be32(backing_file_len);
2920 }
2921
2922
2923 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
2924 if (ret < 0) {
2925 goto fail;
2926 }
2927
2928 ret = 0;
2929fail:
2930 qemu_vfree(header);
2931 return ret;
2932}
2933
2934static int qcow2_change_backing_file(BlockDriverState *bs,
2935 const char *backing_file, const char *backing_fmt)
2936{
2937 BDRVQcow2State *s = bs->opaque;
2938
2939
2940
2941 if (backing_file && data_file_is_raw(bs)) {
2942 return -EINVAL;
2943 }
2944
2945 if (backing_file && strlen(backing_file) > 1023) {
2946 return -EINVAL;
2947 }
2948
2949 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
2950 backing_file ?: "");
2951 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2952 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2953
2954 g_free(s->image_backing_file);
2955 g_free(s->image_backing_format);
2956
2957 s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
2958 s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
2959
2960 return qcow2_update_header(bs);
2961}
2962
2963static int qcow2_crypt_method_from_format(const char *encryptfmt)
2964{
2965 if (g_str_equal(encryptfmt, "luks")) {
2966 return QCOW_CRYPT_LUKS;
2967 } else if (g_str_equal(encryptfmt, "aes")) {
2968 return QCOW_CRYPT_AES;
2969 } else {
2970 return -EINVAL;
2971 }
2972}
2973
2974static int qcow2_set_up_encryption(BlockDriverState *bs,
2975 QCryptoBlockCreateOptions *cryptoopts,
2976 Error **errp)
2977{
2978 BDRVQcow2State *s = bs->opaque;
2979 QCryptoBlock *crypto = NULL;
2980 int fmt, ret;
2981
2982 switch (cryptoopts->format) {
2983 case Q_CRYPTO_BLOCK_FORMAT_LUKS:
2984 fmt = QCOW_CRYPT_LUKS;
2985 break;
2986 case Q_CRYPTO_BLOCK_FORMAT_QCOW:
2987 fmt = QCOW_CRYPT_AES;
2988 break;
2989 default:
2990 error_setg(errp, "Crypto format not supported in qcow2");
2991 return -EINVAL;
2992 }
2993
2994 s->crypt_method_header = fmt;
2995
2996 crypto = qcrypto_block_create(cryptoopts, "encrypt.",
2997 qcow2_crypto_hdr_init_func,
2998 qcow2_crypto_hdr_write_func,
2999 bs, errp);
3000 if (!crypto) {
3001 return -EINVAL;
3002 }
3003
3004 ret = qcow2_update_header(bs);
3005 if (ret < 0) {
3006 error_setg_errno(errp, -ret, "Could not write encryption header");
3007 goto out;
3008 }
3009
3010 ret = 0;
3011 out:
3012 qcrypto_block_free(crypto);
3013 return ret;
3014}
3015
3016
3017
3018
3019
3020
3021
3022
3023static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
3024 uint64_t new_length, PreallocMode mode,
3025 Error **errp)
3026{
3027 BDRVQcow2State *s = bs->opaque;
3028 uint64_t bytes;
3029 uint64_t host_offset = 0;
3030 int64_t file_length;
3031 unsigned int cur_bytes;
3032 int ret;
3033 QCowL2Meta *meta;
3034
3035 assert(offset <= new_length);
3036 bytes = new_length - offset;
3037
3038 while (bytes) {
3039 cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size));
3040 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
3041 &host_offset, &meta);
3042 if (ret < 0) {
3043 error_setg_errno(errp, -ret, "Allocating clusters failed");
3044 return ret;
3045 }
3046
3047 while (meta) {
3048 QCowL2Meta *next = meta->next;
3049
3050 ret = qcow2_alloc_cluster_link_l2(bs, meta);
3051 if (ret < 0) {
3052 error_setg_errno(errp, -ret, "Mapping clusters failed");
3053 qcow2_free_any_clusters(bs, meta->alloc_offset,
3054 meta->nb_clusters, QCOW2_DISCARD_NEVER);
3055 return ret;
3056 }
3057
3058
3059
3060 QLIST_REMOVE(meta, next_in_flight);
3061
3062 g_free(meta);
3063 meta = next;
3064 }
3065
3066
3067
3068 bytes -= cur_bytes;
3069 offset += cur_bytes;
3070 }
3071
3072
3073
3074
3075
3076
3077 file_length = bdrv_getlength(s->data_file->bs);
3078 if (file_length < 0) {
3079 error_setg_errno(errp, -file_length, "Could not get file size");
3080 return file_length;
3081 }
3082
3083 if (host_offset + cur_bytes > file_length) {
3084 if (mode == PREALLOC_MODE_METADATA) {
3085 mode = PREALLOC_MODE_OFF;
3086 }
3087 ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
3088 mode, errp);
3089 if (ret < 0) {
3090 return ret;
3091 }
3092 }
3093
3094 return 0;
3095}
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
3107 int refcount_order, bool generous_increase,
3108 uint64_t *refblock_count)
3109{
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119 int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t);
3120 int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
3121 int64_t table = 0;
3122 int64_t blocks = 0;
3123 int64_t last;
3124 int64_t n = 0;
3125
3126 do {
3127 last = n;
3128 blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
3129 table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
3130 n = clusters + blocks + table;
3131
3132 if (n == last && generous_increase) {
3133 clusters += DIV_ROUND_UP(table, 2);
3134 n = 0;
3135 generous_increase = false;
3136 }
3137 } while (n != last);
3138
3139 if (refblock_count) {
3140 *refblock_count = blocks;
3141 }
3142
3143 return (blocks + table) * cluster_size;
3144}
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155static int64_t qcow2_calc_prealloc_size(int64_t total_size,
3156 size_t cluster_size,
3157 int refcount_order)
3158{
3159 int64_t meta_size = 0;
3160 uint64_t nl1e, nl2e;
3161 int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
3162
3163
3164 meta_size += cluster_size;
3165
3166
3167 nl2e = aligned_total_size / cluster_size;
3168 nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t));
3169 meta_size += nl2e * sizeof(uint64_t);
3170
3171
3172 nl1e = nl2e * sizeof(uint64_t) / cluster_size;
3173 nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t));
3174 meta_size += nl1e * sizeof(uint64_t);
3175
3176
3177 meta_size += qcow2_refcount_metadata_size(
3178 (meta_size + aligned_total_size) / cluster_size,
3179 cluster_size, refcount_order, false, NULL);
3180
3181 return meta_size + aligned_total_size;
3182}
3183
3184static bool validate_cluster_size(size_t cluster_size, Error **errp)
3185{
3186 int cluster_bits = ctz32(cluster_size);
3187 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
3188 (1 << cluster_bits) != cluster_size)
3189 {
3190 error_setg(errp, "Cluster size must be a power of two between %d and "
3191 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
3192 return false;
3193 }
3194 return true;
3195}
3196
3197static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp)
3198{
3199 size_t cluster_size;
3200
3201 cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
3202 DEFAULT_CLUSTER_SIZE);
3203 if (!validate_cluster_size(cluster_size, errp)) {
3204 return 0;
3205 }
3206 return cluster_size;
3207}
3208
3209static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
3210{
3211 char *buf;
3212 int ret;
3213
3214 buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
3215 if (!buf) {
3216 ret = 3;
3217 } else if (!strcmp(buf, "0.10")) {
3218 ret = 2;
3219 } else if (!strcmp(buf, "1.1")) {
3220 ret = 3;
3221 } else {
3222 error_setg(errp, "Invalid compatibility level: '%s'", buf);
3223 ret = -EINVAL;
3224 }
3225 g_free(buf);
3226 return ret;
3227}
3228
3229static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
3230 Error **errp)
3231{
3232 uint64_t refcount_bits;
3233
3234 refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
3235 if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
3236 error_setg(errp, "Refcount width must be a power of two and may not "
3237 "exceed 64 bits");
3238 return 0;
3239 }
3240
3241 if (version < 3 && refcount_bits != 16) {
3242 error_setg(errp, "Different refcount widths than 16 bits require "
3243 "compatibility level 1.1 or above (use compat=1.1 or "
3244 "greater)");
3245 return 0;
3246 }
3247
3248 return refcount_bits;
3249}
3250
3251static int coroutine_fn
3252qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
3253{
3254 BlockdevCreateOptionsQcow2 *qcow2_opts;
3255 QDict *options;
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269 BlockBackend *blk = NULL;
3270 BlockDriverState *bs = NULL;
3271 BlockDriverState *data_bs = NULL;
3272 QCowHeader *header;
3273 size_t cluster_size;
3274 int version;
3275 int refcount_order;
3276 uint64_t* refcount_table;
3277 Error *local_err = NULL;
3278 int ret;
3279
3280 assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
3281 qcow2_opts = &create_options->u.qcow2;
3282
3283 bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp);
3284 if (bs == NULL) {
3285 return -EIO;
3286 }
3287
3288
3289 if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) {
3290 error_setg(errp, "Image size must be a multiple of 512 bytes");
3291 ret = -EINVAL;
3292 goto out;
3293 }
3294
3295 if (qcow2_opts->has_version) {
3296 switch (qcow2_opts->version) {
3297 case BLOCKDEV_QCOW2_VERSION_V2:
3298 version = 2;
3299 break;
3300 case BLOCKDEV_QCOW2_VERSION_V3:
3301 version = 3;
3302 break;
3303 default:
3304 g_assert_not_reached();
3305 }
3306 } else {
3307 version = 3;
3308 }
3309
3310 if (qcow2_opts->has_cluster_size) {
3311 cluster_size = qcow2_opts->cluster_size;
3312 } else {
3313 cluster_size = DEFAULT_CLUSTER_SIZE;
3314 }
3315
3316 if (!validate_cluster_size(cluster_size, errp)) {
3317 ret = -EINVAL;
3318 goto out;
3319 }
3320
3321 if (!qcow2_opts->has_preallocation) {
3322 qcow2_opts->preallocation = PREALLOC_MODE_OFF;
3323 }
3324 if (qcow2_opts->has_backing_file &&
3325 qcow2_opts->preallocation != PREALLOC_MODE_OFF)
3326 {
3327 error_setg(errp, "Backing file and preallocation cannot be used at "
3328 "the same time");
3329 ret = -EINVAL;
3330 goto out;
3331 }
3332 if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) {
3333 error_setg(errp, "Backing format cannot be used without backing file");
3334 ret = -EINVAL;
3335 goto out;
3336 }
3337
3338 if (!qcow2_opts->has_lazy_refcounts) {
3339 qcow2_opts->lazy_refcounts = false;
3340 }
3341 if (version < 3 && qcow2_opts->lazy_refcounts) {
3342 error_setg(errp, "Lazy refcounts only supported with compatibility "
3343 "level 1.1 and above (use version=v3 or greater)");
3344 ret = -EINVAL;
3345 goto out;
3346 }
3347
3348 if (!qcow2_opts->has_refcount_bits) {
3349 qcow2_opts->refcount_bits = 16;
3350 }
3351 if (qcow2_opts->refcount_bits > 64 ||
3352 !is_power_of_2(qcow2_opts->refcount_bits))
3353 {
3354 error_setg(errp, "Refcount width must be a power of two and may not "
3355 "exceed 64 bits");
3356 ret = -EINVAL;
3357 goto out;
3358 }
3359 if (version < 3 && qcow2_opts->refcount_bits != 16) {
3360 error_setg(errp, "Different refcount widths than 16 bits require "
3361 "compatibility level 1.1 or above (use version=v3 or "
3362 "greater)");
3363 ret = -EINVAL;
3364 goto out;
3365 }
3366 refcount_order = ctz32(qcow2_opts->refcount_bits);
3367
3368 if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) {
3369 error_setg(errp, "data-file-raw requires data-file");
3370 ret = -EINVAL;
3371 goto out;
3372 }
3373 if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) {
3374 error_setg(errp, "Backing file and data-file-raw cannot be used at "
3375 "the same time");
3376 ret = -EINVAL;
3377 goto out;
3378 }
3379
3380 if (qcow2_opts->data_file) {
3381 if (version < 3) {
3382 error_setg(errp, "External data files are only supported with "
3383 "compatibility level 1.1 and above (use version=v3 or "
3384 "greater)");
3385 ret = -EINVAL;
3386 goto out;
3387 }
3388 data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp);
3389 if (data_bs == NULL) {
3390 ret = -EIO;
3391 goto out;
3392 }
3393 }
3394
3395
3396 blk = blk_new(bdrv_get_aio_context(bs),
3397 BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
3398 ret = blk_insert_bs(blk, bs, errp);
3399 if (ret < 0) {
3400 goto out;
3401 }
3402 blk_set_allow_write_beyond_eof(blk, true);
3403
3404
3405 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
3406 header = g_malloc0(cluster_size);
3407 *header = (QCowHeader) {
3408 .magic = cpu_to_be32(QCOW_MAGIC),
3409 .version = cpu_to_be32(version),
3410 .cluster_bits = cpu_to_be32(ctz32(cluster_size)),
3411 .size = cpu_to_be64(0),
3412 .l1_table_offset = cpu_to_be64(0),
3413 .l1_size = cpu_to_be32(0),
3414 .refcount_table_offset = cpu_to_be64(cluster_size),
3415 .refcount_table_clusters = cpu_to_be32(1),
3416 .refcount_order = cpu_to_be32(refcount_order),
3417 .header_length = cpu_to_be32(sizeof(*header)),
3418 };
3419
3420
3421 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
3422
3423 if (qcow2_opts->lazy_refcounts) {
3424 header->compatible_features |=
3425 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
3426 }
3427 if (data_bs) {
3428 header->incompatible_features |=
3429 cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE);
3430 }
3431 if (qcow2_opts->data_file_raw) {
3432 header->autoclear_features |=
3433 cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW);
3434 }
3435
3436 ret = blk_pwrite(blk, 0, header, cluster_size, 0);
3437 g_free(header);
3438 if (ret < 0) {
3439 error_setg_errno(errp, -ret, "Could not write qcow2 header");
3440 goto out;
3441 }
3442
3443
3444 refcount_table = g_malloc0(2 * cluster_size);
3445 refcount_table[0] = cpu_to_be64(2 * cluster_size);
3446 ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
3447 g_free(refcount_table);
3448
3449 if (ret < 0) {
3450 error_setg_errno(errp, -ret, "Could not write refcount table");
3451 goto out;
3452 }
3453
3454 blk_unref(blk);
3455 blk = NULL;
3456
3457
3458
3459
3460
3461
3462 options = qdict_new();
3463 qdict_put_str(options, "driver", "qcow2");
3464 qdict_put_str(options, "file", bs->node_name);
3465 if (data_bs) {
3466 qdict_put_str(options, "data-file", data_bs->node_name);
3467 }
3468 blk = blk_new_open(NULL, NULL, options,
3469 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
3470 &local_err);
3471 if (blk == NULL) {
3472 error_propagate(errp, local_err);
3473 ret = -EIO;
3474 goto out;
3475 }
3476
3477 ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
3478 if (ret < 0) {
3479 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
3480 "header and refcount table");
3481 goto out;
3482
3483 } else if (ret != 0) {
3484 error_report("Huh, first cluster in empty image is already in use?");
3485 abort();
3486 }
3487
3488
3489 if (data_bs) {
3490 BDRVQcow2State *s = blk_bs(blk)->opaque;
3491 s->image_data_file = g_strdup(data_bs->filename);
3492 }
3493
3494
3495 ret = qcow2_update_header(blk_bs(blk));
3496 if (ret < 0) {
3497 error_setg_errno(errp, -ret, "Could not update qcow2 header");
3498 goto out;
3499 }
3500
3501
3502 ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
3503 errp);
3504 if (ret < 0) {
3505 error_prepend(errp, "Could not resize image: ");
3506 goto out;
3507 }
3508
3509
3510 if (qcow2_opts->has_backing_file) {
3511 const char *backing_format = NULL;
3512
3513 if (qcow2_opts->has_backing_fmt) {
3514 backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt);
3515 }
3516
3517 ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
3518 backing_format);
3519 if (ret < 0) {
3520 error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
3521 "with format '%s'", qcow2_opts->backing_file,
3522 backing_format);
3523 goto out;
3524 }
3525 }
3526
3527
3528 if (qcow2_opts->has_encrypt) {
3529 ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
3530 if (ret < 0) {
3531 goto out;
3532 }
3533 }
3534
3535 blk_unref(blk);
3536 blk = NULL;
3537
3538
3539
3540
3541
3542
3543
3544 options = qdict_new();
3545 qdict_put_str(options, "driver", "qcow2");
3546 qdict_put_str(options, "file", bs->node_name);
3547 if (data_bs) {
3548 qdict_put_str(options, "data-file", data_bs->node_name);
3549 }
3550 blk = blk_new_open(NULL, NULL, options,
3551 BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
3552 &local_err);
3553 if (blk == NULL) {
3554 error_propagate(errp, local_err);
3555 ret = -EIO;
3556 goto out;
3557 }
3558
3559 ret = 0;
3560out:
3561 blk_unref(blk);
3562 bdrv_unref(bs);
3563 bdrv_unref(data_bs);
3564 return ret;
3565}
3566
3567static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts,
3568 Error **errp)
3569{
3570 BlockdevCreateOptions *create_options = NULL;
3571 QDict *qdict;
3572 Visitor *v;
3573 BlockDriverState *bs = NULL;
3574 BlockDriverState *data_bs = NULL;
3575 Error *local_err = NULL;
3576 const char *val;
3577 int ret;
3578
3579
3580
3581
3582
3583 qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts,
3584 true);
3585
3586
3587 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT);
3588 if (val && !strcmp(val, "on")) {
3589 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow");
3590 } else if (val && !strcmp(val, "off")) {
3591 qdict_del(qdict, BLOCK_OPT_ENCRYPT);
3592 }
3593
3594 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT);
3595 if (val && !strcmp(val, "aes")) {
3596 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow");
3597 }
3598
3599
3600
3601 val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL);
3602 if (val && !strcmp(val, "0.10")) {
3603 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2");
3604 } else if (val && !strcmp(val, "1.1")) {
3605 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
3606 }
3607
3608
3609 static const QDictRenames opt_renames[] = {
3610 { BLOCK_OPT_BACKING_FILE, "backing-file" },
3611 { BLOCK_OPT_BACKING_FMT, "backing-fmt" },
3612 { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" },
3613 { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" },
3614 { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" },
3615 { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT },
3616 { BLOCK_OPT_COMPAT_LEVEL, "version" },
3617 { BLOCK_OPT_DATA_FILE_RAW, "data-file-raw" },
3618 { NULL, NULL },
3619 };
3620
3621 if (!qdict_rename_keys(qdict, opt_renames, errp)) {
3622 ret = -EINVAL;
3623 goto finish;
3624 }
3625
3626
3627 ret = bdrv_create_file(filename, opts, errp);
3628 if (ret < 0) {
3629 goto finish;
3630 }
3631
3632 bs = bdrv_open(filename, NULL, NULL,
3633 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
3634 if (bs == NULL) {
3635 ret = -EIO;
3636 goto finish;
3637 }
3638
3639
3640 val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
3641 if (val) {
3642 ret = bdrv_create_file(val, opts, errp);
3643 if (ret < 0) {
3644 goto finish;
3645 }
3646
3647 data_bs = bdrv_open(val, NULL, NULL,
3648 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
3649 errp);
3650 if (data_bs == NULL) {
3651 ret = -EIO;
3652 goto finish;
3653 }
3654
3655 qdict_del(qdict, BLOCK_OPT_DATA_FILE);
3656 qdict_put_str(qdict, "data-file", data_bs->node_name);
3657 }
3658
3659
3660 qdict_put_str(qdict, "driver", "qcow2");
3661 qdict_put_str(qdict, "file", bs->node_name);
3662
3663
3664 v = qobject_input_visitor_new_flat_confused(qdict, errp);
3665 if (!v) {
3666 ret = -EINVAL;
3667 goto finish;
3668 }
3669
3670 visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
3671 visit_free(v);
3672
3673 if (local_err) {
3674 error_propagate(errp, local_err);
3675 ret = -EINVAL;
3676 goto finish;
3677 }
3678
3679
3680 create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size,
3681 BDRV_SECTOR_SIZE);
3682
3683
3684 ret = qcow2_co_create(create_options, errp);
3685 if (ret < 0) {
3686 goto finish;
3687 }
3688
3689 ret = 0;
3690finish:
3691 qobject_unref(qdict);
3692 bdrv_unref(bs);
3693 bdrv_unref(data_bs);
3694 qapi_free_BlockdevCreateOptions(create_options);
3695 return ret;
3696}
3697
3698
3699static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
3700{
3701 int64_t nr;
3702 int res;
3703
3704
3705 if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
3706 bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
3707 }
3708
3709 if (!bytes) {
3710 return true;
3711 }
3712 res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
3713 return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
3714}
3715
3716static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
3717 int64_t offset, int bytes, BdrvRequestFlags flags)
3718{
3719 int ret;
3720 BDRVQcow2State *s = bs->opaque;
3721
3722 uint32_t head = offset % s->cluster_size;
3723 uint32_t tail = (offset + bytes) % s->cluster_size;
3724
3725 trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
3726 if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
3727 tail = 0;
3728 }
3729
3730 if (head || tail) {
3731 uint64_t off;
3732 unsigned int nr;
3733
3734 assert(head + bytes <= s->cluster_size);
3735
3736
3737 if (!(is_zero(bs, offset - head, head) &&
3738 is_zero(bs, offset + bytes,
3739 tail ? s->cluster_size - tail : 0))) {
3740 return -ENOTSUP;
3741 }
3742
3743 qemu_co_mutex_lock(&s->lock);
3744
3745 offset = QEMU_ALIGN_DOWN(offset, s->cluster_size);
3746 bytes = s->cluster_size;
3747 nr = s->cluster_size;
3748 ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
3749 if (ret != QCOW2_CLUSTER_UNALLOCATED &&
3750 ret != QCOW2_CLUSTER_ZERO_PLAIN &&
3751 ret != QCOW2_CLUSTER_ZERO_ALLOC) {
3752 qemu_co_mutex_unlock(&s->lock);
3753 return -ENOTSUP;
3754 }
3755 } else {
3756 qemu_co_mutex_lock(&s->lock);
3757 }
3758
3759 trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
3760
3761
3762 ret = qcow2_cluster_zeroize(bs, offset, bytes, flags);
3763 qemu_co_mutex_unlock(&s->lock);
3764
3765 return ret;
3766}
3767
3768static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
3769 int64_t offset, int bytes)
3770{
3771 int ret;
3772 BDRVQcow2State *s = bs->opaque;
3773
3774 if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
3775 assert(bytes < s->cluster_size);
3776
3777
3778 if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
3779 offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
3780 return -ENOTSUP;
3781 }
3782 }
3783
3784 qemu_co_mutex_lock(&s->lock);
3785 ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
3786 false);
3787 qemu_co_mutex_unlock(&s->lock);
3788 return ret;
3789}
3790
3791static int coroutine_fn
3792qcow2_co_copy_range_from(BlockDriverState *bs,
3793 BdrvChild *src, uint64_t src_offset,
3794 BdrvChild *dst, uint64_t dst_offset,
3795 uint64_t bytes, BdrvRequestFlags read_flags,
3796 BdrvRequestFlags write_flags)
3797{
3798 BDRVQcow2State *s = bs->opaque;
3799 int ret;
3800 unsigned int cur_bytes;
3801 BdrvChild *child = NULL;
3802 BdrvRequestFlags cur_write_flags;
3803
3804 assert(!bs->encrypted);
3805 qemu_co_mutex_lock(&s->lock);
3806
3807 while (bytes != 0) {
3808 uint64_t copy_offset = 0;
3809
3810 cur_bytes = MIN(bytes, INT_MAX);
3811 cur_write_flags = write_flags;
3812
3813 ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset);
3814 if (ret < 0) {
3815 goto out;
3816 }
3817
3818 switch (ret) {
3819 case QCOW2_CLUSTER_UNALLOCATED:
3820 if (bs->backing && bs->backing->bs) {
3821 int64_t backing_length = bdrv_getlength(bs->backing->bs);
3822 if (src_offset >= backing_length) {
3823 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3824 } else {
3825 child = bs->backing;
3826 cur_bytes = MIN(cur_bytes, backing_length - src_offset);
3827 copy_offset = src_offset;
3828 }
3829 } else {
3830 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3831 }
3832 break;
3833
3834 case QCOW2_CLUSTER_ZERO_PLAIN:
3835 case QCOW2_CLUSTER_ZERO_ALLOC:
3836 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3837 break;
3838
3839 case QCOW2_CLUSTER_COMPRESSED:
3840 ret = -ENOTSUP;
3841 goto out;
3842
3843 case QCOW2_CLUSTER_NORMAL:
3844 child = s->data_file;
3845 copy_offset += offset_into_cluster(s, src_offset);
3846 if ((copy_offset & 511) != 0) {
3847 ret = -EIO;
3848 goto out;
3849 }
3850 break;
3851
3852 default:
3853 abort();
3854 }
3855 qemu_co_mutex_unlock(&s->lock);
3856 ret = bdrv_co_copy_range_from(child,
3857 copy_offset,
3858 dst, dst_offset,
3859 cur_bytes, read_flags, cur_write_flags);
3860 qemu_co_mutex_lock(&s->lock);
3861 if (ret < 0) {
3862 goto out;
3863 }
3864
3865 bytes -= cur_bytes;
3866 src_offset += cur_bytes;
3867 dst_offset += cur_bytes;
3868 }
3869 ret = 0;
3870
3871out:
3872 qemu_co_mutex_unlock(&s->lock);
3873 return ret;
3874}
3875
3876static int coroutine_fn
3877qcow2_co_copy_range_to(BlockDriverState *bs,
3878 BdrvChild *src, uint64_t src_offset,
3879 BdrvChild *dst, uint64_t dst_offset,
3880 uint64_t bytes, BdrvRequestFlags read_flags,
3881 BdrvRequestFlags write_flags)
3882{
3883 BDRVQcow2State *s = bs->opaque;
3884 int offset_in_cluster;
3885 int ret;
3886 unsigned int cur_bytes;
3887 uint64_t cluster_offset;
3888 QCowL2Meta *l2meta = NULL;
3889
3890 assert(!bs->encrypted);
3891
3892 qemu_co_mutex_lock(&s->lock);
3893
3894 while (bytes != 0) {
3895
3896 l2meta = NULL;
3897
3898 offset_in_cluster = offset_into_cluster(s, dst_offset);
3899 cur_bytes = MIN(bytes, INT_MAX);
3900
3901
3902
3903
3904
3905 ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
3906 &cluster_offset, &l2meta);
3907 if (ret < 0) {
3908 goto fail;
3909 }
3910
3911 assert((cluster_offset & 511) == 0);
3912
3913 ret = qcow2_pre_write_overlap_check(bs, 0,
3914 cluster_offset + offset_in_cluster, cur_bytes, true);
3915 if (ret < 0) {
3916 goto fail;
3917 }
3918
3919 qemu_co_mutex_unlock(&s->lock);
3920 ret = bdrv_co_copy_range_to(src, src_offset,
3921 s->data_file,
3922 cluster_offset + offset_in_cluster,
3923 cur_bytes, read_flags, write_flags);
3924 qemu_co_mutex_lock(&s->lock);
3925 if (ret < 0) {
3926 goto fail;
3927 }
3928
3929 ret = qcow2_handle_l2meta(bs, &l2meta, true);
3930 if (ret) {
3931 goto fail;
3932 }
3933
3934 bytes -= cur_bytes;
3935 src_offset += cur_bytes;
3936 dst_offset += cur_bytes;
3937 }
3938 ret = 0;
3939
3940fail:
3941 qcow2_handle_l2meta(bs, &l2meta, false);
3942
3943 qemu_co_mutex_unlock(&s->lock);
3944
3945 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
3946
3947 return ret;
3948}
3949
3950static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
3951 bool exact, PreallocMode prealloc,
3952 Error **errp)
3953{
3954 BDRVQcow2State *s = bs->opaque;
3955 uint64_t old_length;
3956 int64_t new_l1_size;
3957 int ret;
3958 QDict *options;
3959
3960 if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
3961 prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
3962 {
3963 error_setg(errp, "Unsupported preallocation mode '%s'",
3964 PreallocMode_str(prealloc));
3965 return -ENOTSUP;
3966 }
3967
3968 if (offset & 511) {
3969 error_setg(errp, "The new size must be a multiple of 512");
3970 return -EINVAL;
3971 }
3972
3973 qemu_co_mutex_lock(&s->lock);
3974
3975
3976 if (s->nb_snapshots) {
3977 error_setg(errp, "Can't resize an image which has snapshots");
3978 ret = -ENOTSUP;
3979 goto fail;
3980 }
3981
3982
3983 if (qcow2_truncate_bitmaps_check(bs, errp)) {
3984 ret = -ENOTSUP;
3985 goto fail;
3986 }
3987
3988 old_length = bs->total_sectors * BDRV_SECTOR_SIZE;
3989 new_l1_size = size_to_l1(s, offset);
3990
3991 if (offset < old_length) {
3992 int64_t last_cluster, old_file_size;
3993 if (prealloc != PREALLOC_MODE_OFF) {
3994 error_setg(errp,
3995 "Preallocation can't be used for shrinking an image");
3996 ret = -EINVAL;
3997 goto fail;
3998 }
3999
4000 ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
4001 old_length - ROUND_UP(offset,
4002 s->cluster_size),
4003 QCOW2_DISCARD_ALWAYS, true);
4004 if (ret < 0) {
4005 error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
4006 goto fail;
4007 }
4008
4009 ret = qcow2_shrink_l1_table(bs, new_l1_size);
4010 if (ret < 0) {
4011 error_setg_errno(errp, -ret,
4012 "Failed to reduce the number of L2 tables");
4013 goto fail;
4014 }
4015
4016 ret = qcow2_shrink_reftable(bs);
4017 if (ret < 0) {
4018 error_setg_errno(errp, -ret,
4019 "Failed to discard unused refblocks");
4020 goto fail;
4021 }
4022
4023 old_file_size = bdrv_getlength(bs->file->bs);
4024 if (old_file_size < 0) {
4025 error_setg_errno(errp, -old_file_size,
4026 "Failed to inquire current file length");
4027 ret = old_file_size;
4028 goto fail;
4029 }
4030 last_cluster = qcow2_get_last_cluster(bs, old_file_size);
4031 if (last_cluster < 0) {
4032 error_setg_errno(errp, -last_cluster,
4033 "Failed to find the last cluster");
4034 ret = last_cluster;
4035 goto fail;
4036 }
4037 if ((last_cluster + 1) * s->cluster_size < old_file_size) {
4038 Error *local_err = NULL;
4039
4040
4041
4042
4043
4044
4045
4046
4047 bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
4048 false, PREALLOC_MODE_OFF, &local_err);
4049 if (local_err) {
4050 warn_reportf_err(local_err,
4051 "Failed to truncate the tail of the image: ");
4052 }
4053 }
4054 } else {
4055 ret = qcow2_grow_l1_table(bs, new_l1_size, true);
4056 if (ret < 0) {
4057 error_setg_errno(errp, -ret, "Failed to grow the L1 table");
4058 goto fail;
4059 }
4060 }
4061
4062 switch (prealloc) {
4063 case PREALLOC_MODE_OFF:
4064 if (has_data_file(bs)) {
4065
4066
4067
4068
4069
4070 ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp);
4071 if (ret < 0) {
4072 goto fail;
4073 }
4074 }
4075 break;
4076
4077 case PREALLOC_MODE_METADATA:
4078 ret = preallocate_co(bs, old_length, offset, prealloc, errp);
4079 if (ret < 0) {
4080 goto fail;
4081 }
4082 break;
4083
4084 case PREALLOC_MODE_FALLOC:
4085 case PREALLOC_MODE_FULL:
4086 {
4087 int64_t allocation_start, host_offset, guest_offset;
4088 int64_t clusters_allocated;
4089 int64_t old_file_size, new_file_size;
4090 uint64_t nb_new_data_clusters, nb_new_l2_tables;
4091
4092
4093
4094 if (has_data_file(bs)) {
4095 ret = preallocate_co(bs, old_length, offset, prealloc, errp);
4096 if (ret < 0) {
4097 goto fail;
4098 }
4099 break;
4100 }
4101
4102 old_file_size = bdrv_getlength(bs->file->bs);
4103 if (old_file_size < 0) {
4104 error_setg_errno(errp, -old_file_size,
4105 "Failed to inquire current file length");
4106 ret = old_file_size;
4107 goto fail;
4108 }
4109 old_file_size = ROUND_UP(old_file_size, s->cluster_size);
4110
4111 nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
4112 s->cluster_size);
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124 nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
4125 s->cluster_size / sizeof(uint64_t));
4126
4127
4128 nb_new_l2_tables++;
4129
4130 allocation_start = qcow2_refcount_area(bs, old_file_size,
4131 nb_new_data_clusters +
4132 nb_new_l2_tables,
4133 true, 0, 0);
4134 if (allocation_start < 0) {
4135 error_setg_errno(errp, -allocation_start,
4136 "Failed to resize refcount structures");
4137 ret = allocation_start;
4138 goto fail;
4139 }
4140
4141 clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
4142 nb_new_data_clusters);
4143 if (clusters_allocated < 0) {
4144 error_setg_errno(errp, -clusters_allocated,
4145 "Failed to allocate data clusters");
4146 ret = clusters_allocated;
4147 goto fail;
4148 }
4149
4150 assert(clusters_allocated == nb_new_data_clusters);
4151
4152
4153 new_file_size = allocation_start +
4154 nb_new_data_clusters * s->cluster_size;
4155
4156 ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp);
4157 if (ret < 0) {
4158 error_prepend(errp, "Failed to resize underlying file: ");
4159 qcow2_free_clusters(bs, allocation_start,
4160 nb_new_data_clusters * s->cluster_size,
4161 QCOW2_DISCARD_OTHER);
4162 goto fail;
4163 }
4164
4165
4166 host_offset = allocation_start;
4167 guest_offset = old_length;
4168 while (nb_new_data_clusters) {
4169 int64_t nb_clusters = MIN(
4170 nb_new_data_clusters,
4171 s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
4172 QCowL2Meta allocation = {
4173 .offset = guest_offset,
4174 .alloc_offset = host_offset,
4175 .nb_clusters = nb_clusters,
4176 };
4177 qemu_co_queue_init(&allocation.dependent_requests);
4178
4179 ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
4180 if (ret < 0) {
4181 error_setg_errno(errp, -ret, "Failed to update L2 tables");
4182 qcow2_free_clusters(bs, host_offset,
4183 nb_new_data_clusters * s->cluster_size,
4184 QCOW2_DISCARD_OTHER);
4185 goto fail;
4186 }
4187
4188 guest_offset += nb_clusters * s->cluster_size;
4189 host_offset += nb_clusters * s->cluster_size;
4190 nb_new_data_clusters -= nb_clusters;
4191 }
4192 break;
4193 }
4194
4195 default:
4196 g_assert_not_reached();
4197 }
4198
4199 if (prealloc != PREALLOC_MODE_OFF) {
4200
4201 ret = qcow2_write_caches(bs);
4202 if (ret < 0) {
4203 error_setg_errno(errp, -ret,
4204 "Failed to flush the preallocated area to disk");
4205 goto fail;
4206 }
4207 }
4208
4209 bs->total_sectors = offset / BDRV_SECTOR_SIZE;
4210
4211
4212 offset = cpu_to_be64(offset);
4213 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
4214 &offset, sizeof(uint64_t));
4215 if (ret < 0) {
4216 error_setg_errno(errp, -ret, "Failed to update the image size");
4217 goto fail;
4218 }
4219
4220 s->l1_vm_state_index = new_l1_size;
4221
4222
4223 options = qdict_clone_shallow(bs->options);
4224 ret = qcow2_update_options(bs, options, s->flags, errp);
4225 qobject_unref(options);
4226 if (ret < 0) {
4227 goto fail;
4228 }
4229 ret = 0;
4230fail:
4231 qemu_co_mutex_unlock(&s->lock);
4232 return ret;
4233}
4234
4235
4236
4237static coroutine_fn int
4238qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
4239 uint64_t offset, uint64_t bytes,
4240 QEMUIOVector *qiov, size_t qiov_offset)
4241{
4242 BDRVQcow2State *s = bs->opaque;
4243 int ret;
4244 ssize_t out_len;
4245 uint8_t *buf, *out_buf;
4246 uint64_t cluster_offset;
4247
4248 if (has_data_file(bs)) {
4249 return -ENOTSUP;
4250 }
4251
4252 if (bytes == 0) {
4253
4254
4255 int64_t len = bdrv_getlength(bs->file->bs);
4256 if (len < 0) {
4257 return len;
4258 }
4259 return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
4260 }
4261
4262 if (offset_into_cluster(s, offset)) {
4263 return -EINVAL;
4264 }
4265
4266 buf = qemu_blockalign(bs, s->cluster_size);
4267 if (bytes != s->cluster_size) {
4268 if (bytes > s->cluster_size ||
4269 offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
4270 {
4271 qemu_vfree(buf);
4272 return -EINVAL;
4273 }
4274
4275 memset(buf + bytes, 0, s->cluster_size - bytes);
4276 }
4277 qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
4278
4279 out_buf = g_malloc(s->cluster_size);
4280
4281 out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1,
4282 buf, s->cluster_size);
4283 if (out_len == -ENOMEM) {
4284
4285 ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
4286 if (ret < 0) {
4287 goto fail;
4288 }
4289 goto success;
4290 } else if (out_len < 0) {
4291 ret = -EINVAL;
4292 goto fail;
4293 }
4294
4295 qemu_co_mutex_lock(&s->lock);
4296 ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len,
4297 &cluster_offset);
4298 if (ret < 0) {
4299 qemu_co_mutex_unlock(&s->lock);
4300 goto fail;
4301 }
4302
4303 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true);
4304 qemu_co_mutex_unlock(&s->lock);
4305 if (ret < 0) {
4306 goto fail;
4307 }
4308
4309 BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
4310 ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
4311 if (ret < 0) {
4312 goto fail;
4313 }
4314success:
4315 ret = 0;
4316fail:
4317 qemu_vfree(buf);
4318 g_free(out_buf);
4319 return ret;
4320}
4321
4322static int coroutine_fn
4323qcow2_co_preadv_compressed(BlockDriverState *bs,
4324 uint64_t file_cluster_offset,
4325 uint64_t offset,
4326 uint64_t bytes,
4327 QEMUIOVector *qiov,
4328 size_t qiov_offset)
4329{
4330 BDRVQcow2State *s = bs->opaque;
4331 int ret = 0, csize, nb_csectors;
4332 uint64_t coffset;
4333 uint8_t *buf, *out_buf;
4334 int offset_in_cluster = offset_into_cluster(s, offset);
4335
4336 coffset = file_cluster_offset & s->cluster_offset_mask;
4337 nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
4338 csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
4339 (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK);
4340
4341 buf = g_try_malloc(csize);
4342 if (!buf) {
4343 return -ENOMEM;
4344 }
4345
4346 out_buf = qemu_blockalign(bs, s->cluster_size);
4347
4348 BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
4349 ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
4350 if (ret < 0) {
4351 goto fail;
4352 }
4353
4354 if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
4355 ret = -EIO;
4356 goto fail;
4357 }
4358
4359 qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);
4360
4361fail:
4362 qemu_vfree(out_buf);
4363 g_free(buf);
4364
4365 return ret;
4366}
4367
4368static int make_completely_empty(BlockDriverState *bs)
4369{
4370 BDRVQcow2State *s = bs->opaque;
4371 Error *local_err = NULL;
4372 int ret, l1_clusters;
4373 int64_t offset;
4374 uint64_t *new_reftable = NULL;
4375 uint64_t rt_entry, l1_size2;
4376 struct {
4377 uint64_t l1_offset;
4378 uint64_t reftable_offset;
4379 uint32_t reftable_clusters;
4380 } QEMU_PACKED l1_ofs_rt_ofs_cls;
4381
4382 ret = qcow2_cache_empty(bs, s->l2_table_cache);
4383 if (ret < 0) {
4384 goto fail;
4385 }
4386
4387 ret = qcow2_cache_empty(bs, s->refcount_block_cache);
4388 if (ret < 0) {
4389 goto fail;
4390 }
4391
4392
4393 ret = qcow2_mark_dirty(bs);
4394 if (ret < 0) {
4395 goto fail;
4396 }
4397
4398 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4399
4400 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4401 l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
4402
4403
4404
4405
4406 ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
4407 l1_clusters * s->cluster_size, 0);
4408 if (ret < 0) {
4409 goto fail_broken_refcounts;
4410 }
4411 memset(s->l1_table, 0, l1_size2);
4412
4413 BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
4414
4415
4416
4417
4418
4419
4420 ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
4421 (2 + l1_clusters) * s->cluster_size, 0);
4422
4423
4424
4425
4426 if (ret < 0) {
4427 goto fail_broken_refcounts;
4428 }
4429
4430 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4431 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
4432
4433
4434
4435
4436 l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
4437 l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
4438 l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
4439 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
4440 &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
4441 if (ret < 0) {
4442 goto fail_broken_refcounts;
4443 }
4444
4445 s->l1_table_offset = 3 * s->cluster_size;
4446
4447 new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
4448 if (!new_reftable) {
4449 ret = -ENOMEM;
4450 goto fail_broken_refcounts;
4451 }
4452
4453 s->refcount_table_offset = s->cluster_size;
4454 s->refcount_table_size = s->cluster_size / sizeof(uint64_t);
4455 s->max_refcount_table_index = 0;
4456
4457 g_free(s->refcount_table);
4458 s->refcount_table = new_reftable;
4459 new_reftable = NULL;
4460
4461
4462
4463
4464
4465
4466
4467 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
4468
4469
4470 rt_entry = cpu_to_be64(2 * s->cluster_size);
4471 ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
4472 &rt_entry, sizeof(rt_entry));
4473 if (ret < 0) {
4474 goto fail_broken_refcounts;
4475 }
4476 s->refcount_table[0] = 2 * s->cluster_size;
4477
4478 s->free_cluster_index = 0;
4479 assert(3 + l1_clusters <= s->refcount_block_size);
4480 offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
4481 if (offset < 0) {
4482 ret = offset;
4483 goto fail_broken_refcounts;
4484 } else if (offset > 0) {
4485 error_report("First cluster in emptied image is in use");
4486 abort();
4487 }
4488
4489
4490
4491 ret = qcow2_mark_clean(bs);
4492 if (ret < 0) {
4493 goto fail;
4494 }
4495
4496 ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
4497 PREALLOC_MODE_OFF, &local_err);
4498 if (ret < 0) {
4499 error_report_err(local_err);
4500 goto fail;
4501 }
4502
4503 return 0;
4504
4505fail_broken_refcounts:
4506
4507
4508
4509
4510
4511
4512 bs->drv = NULL;
4513
4514fail:
4515 g_free(new_reftable);
4516 return ret;
4517}
4518
4519static int qcow2_make_empty(BlockDriverState *bs)
4520{
4521 BDRVQcow2State *s = bs->opaque;
4522 uint64_t offset, end_offset;
4523 int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
4524 int l1_clusters, ret = 0;
4525
4526 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4527
4528 if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
4529 3 + l1_clusters <= s->refcount_block_size &&
4530 s->crypt_method_header != QCOW_CRYPT_LUKS &&
4531 !has_data_file(bs)) {
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541 return make_completely_empty(bs);
4542 }
4543
4544
4545
4546 end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
4547 for (offset = 0; offset < end_offset; offset += step) {
4548
4549
4550
4551
4552
4553 ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
4554 QCOW2_DISCARD_SNAPSHOT, true);
4555 if (ret < 0) {
4556 break;
4557 }
4558 }
4559
4560 return ret;
4561}
4562
4563static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
4564{
4565 BDRVQcow2State *s = bs->opaque;
4566 int ret;
4567
4568 qemu_co_mutex_lock(&s->lock);
4569 ret = qcow2_write_caches(bs);
4570 qemu_co_mutex_unlock(&s->lock);
4571
4572 return ret;
4573}
4574
4575static ssize_t qcow2_measure_crypto_hdr_init_func(QCryptoBlock *block,
4576 size_t headerlen, void *opaque, Error **errp)
4577{
4578 size_t *headerlenp = opaque;
4579
4580
4581 *headerlenp = headerlen;
4582 return 0;
4583}
4584
4585static ssize_t qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block,
4586 size_t offset, const uint8_t *buf, size_t buflen,
4587 void *opaque, Error **errp)
4588{
4589
4590 return buflen;
4591}
4592
4593
4594static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len,
4595 Error **errp)
4596{
4597 QDict *opts_qdict;
4598 QDict *cryptoopts_qdict;
4599 QCryptoBlockCreateOptions *cryptoopts;
4600 QCryptoBlock *crypto;
4601
4602
4603 opts_qdict = qemu_opts_to_qdict(opts, NULL);
4604 qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
4605 qobject_unref(opts_qdict);
4606
4607
4608 qdict_put_str(cryptoopts_qdict, "format", "luks");
4609 cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp);
4610 qobject_unref(cryptoopts_qdict);
4611 if (!cryptoopts) {
4612 return false;
4613 }
4614
4615
4616 crypto = qcrypto_block_create(cryptoopts, "encrypt.",
4617 qcow2_measure_crypto_hdr_init_func,
4618 qcow2_measure_crypto_hdr_write_func,
4619 len, errp);
4620 qapi_free_QCryptoBlockCreateOptions(cryptoopts);
4621 if (!crypto) {
4622 return false;
4623 }
4624
4625 qcrypto_block_free(crypto);
4626 return true;
4627}
4628
4629static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
4630 Error **errp)
4631{
4632 Error *local_err = NULL;
4633 BlockMeasureInfo *info;
4634 uint64_t required = 0;
4635 uint64_t virtual_size;
4636 uint64_t refcount_bits;
4637 uint64_t l2_tables;
4638 uint64_t luks_payload_size = 0;
4639 size_t cluster_size;
4640 int version;
4641 char *optstr;
4642 PreallocMode prealloc;
4643 bool has_backing_file;
4644 bool has_luks;
4645
4646
4647 cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err);
4648 if (local_err) {
4649 goto err;
4650 }
4651
4652 version = qcow2_opt_get_version_del(opts, &local_err);
4653 if (local_err) {
4654 goto err;
4655 }
4656
4657 refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
4658 if (local_err) {
4659 goto err;
4660 }
4661
4662 optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
4663 prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
4664 PREALLOC_MODE_OFF, &local_err);
4665 g_free(optstr);
4666 if (local_err) {
4667 goto err;
4668 }
4669
4670 optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
4671 has_backing_file = !!optstr;
4672 g_free(optstr);
4673
4674 optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
4675 has_luks = optstr && strcmp(optstr, "luks") == 0;
4676 g_free(optstr);
4677
4678 if (has_luks) {
4679 size_t headerlen;
4680
4681 if (!qcow2_measure_luks_headerlen(opts, &headerlen, &local_err)) {
4682 goto err;
4683 }
4684
4685 luks_payload_size = ROUND_UP(headerlen, cluster_size);
4686 }
4687
4688 virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
4689 virtual_size = ROUND_UP(virtual_size, cluster_size);
4690
4691
4692 l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
4693 cluster_size / sizeof(uint64_t));
4694 if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) {
4695 error_setg(&local_err, "The image size is too large "
4696 "(try using a larger cluster size)");
4697 goto err;
4698 }
4699
4700
4701 if (in_bs) {
4702 int64_t ssize = bdrv_getlength(in_bs);
4703 if (ssize < 0) {
4704 error_setg_errno(&local_err, -ssize,
4705 "Unable to get image virtual_size");
4706 goto err;
4707 }
4708
4709 virtual_size = ROUND_UP(ssize, cluster_size);
4710
4711 if (has_backing_file) {
4712
4713
4714
4715
4716
4717 required = virtual_size;
4718 } else {
4719 int64_t offset;
4720 int64_t pnum = 0;
4721
4722 for (offset = 0; offset < ssize; offset += pnum) {
4723 int ret;
4724
4725 ret = bdrv_block_status_above(in_bs, NULL, offset,
4726 ssize - offset, &pnum, NULL,
4727 NULL);
4728 if (ret < 0) {
4729 error_setg_errno(&local_err, -ret,
4730 "Unable to get block status");
4731 goto err;
4732 }
4733
4734 if (ret & BDRV_BLOCK_ZERO) {
4735
4736 } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
4737 (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
4738
4739 pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
4740
4741
4742 required += offset % cluster_size + pnum;
4743 }
4744 }
4745 }
4746 }
4747
4748
4749
4750
4751 if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
4752 required = virtual_size;
4753 }
4754
4755 info = g_new(BlockMeasureInfo, 1);
4756 info->fully_allocated =
4757 qcow2_calc_prealloc_size(virtual_size, cluster_size,
4758 ctz32(refcount_bits)) + luks_payload_size;
4759
4760
4761
4762
4763
4764 info->required = info->fully_allocated - virtual_size + required;
4765 return info;
4766
4767err:
4768 error_propagate(errp, local_err);
4769 return NULL;
4770}
4771
4772static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4773{
4774 BDRVQcow2State *s = bs->opaque;
4775 bdi->unallocated_blocks_are_zero = true;
4776 bdi->cluster_size = s->cluster_size;
4777 bdi->vm_state_offset = qcow2_vm_state_offset(s);
4778 return 0;
4779}
4780
4781static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
4782 Error **errp)
4783{
4784 BDRVQcow2State *s = bs->opaque;
4785 ImageInfoSpecific *spec_info;
4786 QCryptoBlockInfo *encrypt_info = NULL;
4787 Error *local_err = NULL;
4788
4789 if (s->crypto != NULL) {
4790 encrypt_info = qcrypto_block_get_info(s->crypto, &local_err);
4791 if (local_err) {
4792 error_propagate(errp, local_err);
4793 return NULL;
4794 }
4795 }
4796
4797 spec_info = g_new(ImageInfoSpecific, 1);
4798 *spec_info = (ImageInfoSpecific){
4799 .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
4800 .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1),
4801 };
4802 if (s->qcow_version == 2) {
4803 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4804 .compat = g_strdup("0.10"),
4805 .refcount_bits = s->refcount_bits,
4806 };
4807 } else if (s->qcow_version == 3) {
4808 Qcow2BitmapInfoList *bitmaps;
4809 bitmaps = qcow2_get_bitmap_info_list(bs, &local_err);
4810 if (local_err) {
4811 error_propagate(errp, local_err);
4812 qapi_free_ImageInfoSpecific(spec_info);
4813 qapi_free_QCryptoBlockInfo(encrypt_info);
4814 return NULL;
4815 }
4816 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4817 .compat = g_strdup("1.1"),
4818 .lazy_refcounts = s->compatible_features &
4819 QCOW2_COMPAT_LAZY_REFCOUNTS,
4820 .has_lazy_refcounts = true,
4821 .corrupt = s->incompatible_features &
4822 QCOW2_INCOMPAT_CORRUPT,
4823 .has_corrupt = true,
4824 .refcount_bits = s->refcount_bits,
4825 .has_bitmaps = !!bitmaps,
4826 .bitmaps = bitmaps,
4827 .has_data_file = !!s->image_data_file,
4828 .data_file = g_strdup(s->image_data_file),
4829 .has_data_file_raw = has_data_file(bs),
4830 .data_file_raw = data_file_is_raw(bs),
4831 };
4832 } else {
4833
4834
4835 assert(false);
4836 }
4837
4838 if (encrypt_info) {
4839 ImageInfoSpecificQCow2Encryption *qencrypt =
4840 g_new(ImageInfoSpecificQCow2Encryption, 1);
4841 switch (encrypt_info->format) {
4842 case Q_CRYPTO_BLOCK_FORMAT_QCOW:
4843 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
4844 break;
4845 case Q_CRYPTO_BLOCK_FORMAT_LUKS:
4846 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
4847 qencrypt->u.luks = encrypt_info->u.luks;
4848 break;
4849 default:
4850 abort();
4851 }
4852
4853
4854 memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
4855 qapi_free_QCryptoBlockInfo(encrypt_info);
4856
4857 spec_info->u.qcow2.data->has_encrypt = true;
4858 spec_info->u.qcow2.data->encrypt = qencrypt;
4859 }
4860
4861 return spec_info;
4862}
4863
4864static int qcow2_has_zero_init(BlockDriverState *bs)
4865{
4866 BDRVQcow2State *s = bs->opaque;
4867 bool preallocated;
4868
4869 if (qemu_in_coroutine()) {
4870 qemu_co_mutex_lock(&s->lock);
4871 }
4872
4873
4874
4875
4876
4877 preallocated = s->l1_size > 0 && s->l1_table[0] != 0;
4878 if (qemu_in_coroutine()) {
4879 qemu_co_mutex_unlock(&s->lock);
4880 }
4881
4882 if (!preallocated) {
4883 return 1;
4884 } else if (bs->encrypted) {
4885 return 0;
4886 } else {
4887 return bdrv_has_zero_init(s->data_file->bs);
4888 }
4889}
4890
4891static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4892 int64_t pos)
4893{
4894 BDRVQcow2State *s = bs->opaque;
4895
4896 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
4897 return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
4898 qiov->size, qiov, 0, 0);
4899}
4900
4901static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4902 int64_t pos)
4903{
4904 BDRVQcow2State *s = bs->opaque;
4905
4906 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
4907 return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos,
4908 qiov->size, qiov, 0, 0);
4909}
4910
4911
4912
4913
4914
4915static int qcow2_downgrade(BlockDriverState *bs, int target_version,
4916 BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
4917 Error **errp)
4918{
4919 BDRVQcow2State *s = bs->opaque;
4920 int current_version = s->qcow_version;
4921 int ret;
4922
4923
4924 assert(target_version < current_version);
4925
4926
4927 assert(target_version == 2);
4928
4929 if (s->refcount_order != 4) {
4930 error_setg(errp, "compat=0.10 requires refcount_bits=16");
4931 return -ENOTSUP;
4932 }
4933
4934 if (has_data_file(bs)) {
4935 error_setg(errp, "Cannot downgrade an image with a data file");
4936 return -ENOTSUP;
4937 }
4938
4939
4940 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
4941 ret = qcow2_mark_clean(bs);
4942 if (ret < 0) {
4943 error_setg_errno(errp, -ret, "Failed to make the image clean");
4944 return ret;
4945 }
4946 }
4947
4948
4949
4950
4951
4952 if (s->incompatible_features) {
4953 error_setg(errp, "Cannot downgrade an image with incompatible features "
4954 "%#" PRIx64 " set", s->incompatible_features);
4955 return -ENOTSUP;
4956 }
4957
4958
4959 s->compatible_features = 0;
4960
4961
4962
4963
4964 s->autoclear_features = 0;
4965
4966 ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
4967 if (ret < 0) {
4968 error_setg_errno(errp, -ret, "Failed to turn zero into data clusters");
4969 return ret;
4970 }
4971
4972 s->qcow_version = target_version;
4973 ret = qcow2_update_header(bs);
4974 if (ret < 0) {
4975 s->qcow_version = current_version;
4976 error_setg_errno(errp, -ret, "Failed to update the image header");
4977 return ret;
4978 }
4979 return 0;
4980}
4981
4982
4983
4984
4985
4986
4987static int qcow2_upgrade(BlockDriverState *bs, int target_version,
4988 BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
4989 Error **errp)
4990{
4991 BDRVQcow2State *s = bs->opaque;
4992 bool need_snapshot_update;
4993 int current_version = s->qcow_version;
4994 int i;
4995 int ret;
4996
4997
4998 assert(target_version > current_version);
4999
5000
5001 assert(target_version == 3);
5002
5003 status_cb(bs, 0, 2, cb_opaque);
5004
5005
5006
5007
5008
5009
5010
5011
5012 need_snapshot_update = false;
5013 for (i = 0; i < s->nb_snapshots; i++) {
5014 if (s->snapshots[i].extra_data_size <
5015 sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
5016 sizeof_field(QCowSnapshotExtraData, disk_size))
5017 {
5018 need_snapshot_update = true;
5019 break;
5020 }
5021 }
5022 if (need_snapshot_update) {
5023 ret = qcow2_write_snapshots(bs);
5024 if (ret < 0) {
5025 error_setg_errno(errp, -ret, "Failed to update the snapshot table");
5026 return ret;
5027 }
5028 }
5029 status_cb(bs, 1, 2, cb_opaque);
5030
5031 s->qcow_version = target_version;
5032 ret = qcow2_update_header(bs);
5033 if (ret < 0) {
5034 s->qcow_version = current_version;
5035 error_setg_errno(errp, -ret, "Failed to update the image header");
5036 return ret;
5037 }
5038 status_cb(bs, 2, 2, cb_opaque);
5039
5040 return 0;
5041}
5042
5043typedef enum Qcow2AmendOperation {
5044
5045
5046
5047 QCOW2_NO_OPERATION = 0,
5048
5049 QCOW2_UPGRADING,
5050 QCOW2_CHANGING_REFCOUNT_ORDER,
5051 QCOW2_DOWNGRADING,
5052} Qcow2AmendOperation;
5053
5054typedef struct Qcow2AmendHelperCBInfo {
5055
5056
5057 BlockDriverAmendStatusCB *original_status_cb;
5058 void *original_cb_opaque;
5059
5060 Qcow2AmendOperation current_operation;
5061
5062
5063 int total_operations;
5064
5065
5066
5067
5068 int operations_completed;
5069
5070
5071 int64_t offset_completed;
5072
5073 Qcow2AmendOperation last_operation;
5074 int64_t last_work_size;
5075} Qcow2AmendHelperCBInfo;
5076
5077static void qcow2_amend_helper_cb(BlockDriverState *bs,
5078 int64_t operation_offset,
5079 int64_t operation_work_size, void *opaque)
5080{
5081 Qcow2AmendHelperCBInfo *info = opaque;
5082 int64_t current_work_size;
5083 int64_t projected_work_size;
5084
5085 if (info->current_operation != info->last_operation) {
5086 if (info->last_operation != QCOW2_NO_OPERATION) {
5087 info->offset_completed += info->last_work_size;
5088 info->operations_completed++;
5089 }
5090
5091 info->last_operation = info->current_operation;
5092 }
5093
5094 assert(info->total_operations > 0);
5095 assert(info->operations_completed < info->total_operations);
5096
5097 info->last_work_size = operation_work_size;
5098
5099 current_work_size = info->offset_completed + operation_work_size;
5100
5101
5102
5103
5104
5105 projected_work_size = current_work_size * (info->total_operations -
5106 info->operations_completed - 1)
5107 / (info->operations_completed + 1);
5108
5109 info->original_status_cb(bs, info->offset_completed + operation_offset,
5110 current_work_size + projected_work_size,
5111 info->original_cb_opaque);
5112}
5113
5114static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
5115 BlockDriverAmendStatusCB *status_cb,
5116 void *cb_opaque,
5117 Error **errp)
5118{
5119 BDRVQcow2State *s = bs->opaque;
5120 int old_version = s->qcow_version, new_version = old_version;
5121 uint64_t new_size = 0;
5122 const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL;
5123 bool lazy_refcounts = s->use_lazy_refcounts;
5124 bool data_file_raw = data_file_is_raw(bs);
5125 const char *compat = NULL;
5126 uint64_t cluster_size = s->cluster_size;
5127 bool encrypt;
5128 int encformat;
5129 int refcount_bits = s->refcount_bits;
5130 int ret;
5131 QemuOptDesc *desc = opts->list->desc;
5132 Qcow2AmendHelperCBInfo helper_cb_info;
5133
5134 while (desc && desc->name) {
5135 if (!qemu_opt_find(opts, desc->name)) {
5136
5137 desc++;
5138 continue;
5139 }
5140
5141 if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
5142 compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
5143 if (!compat) {
5144
5145 } else if (!strcmp(compat, "0.10") || !strcmp(compat, "v2")) {
5146 new_version = 2;
5147 } else if (!strcmp(compat, "1.1") || !strcmp(compat, "v3")) {
5148 new_version = 3;
5149 } else {
5150 error_setg(errp, "Unknown compatibility level %s", compat);
5151 return -EINVAL;
5152 }
5153 } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
5154 error_setg(errp, "Cannot change preallocation mode");
5155 return -ENOTSUP;
5156 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
5157 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5158 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
5159 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5160 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
5161 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5162 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
5163 encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
5164 !!s->crypto);
5165
5166 if (encrypt != !!s->crypto) {
5167 error_setg(errp,
5168 "Changing the encryption flag is not supported");
5169 return -ENOTSUP;
5170 }
5171 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
5172 encformat = qcow2_crypt_method_from_format(
5173 qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));
5174
5175 if (encformat != s->crypt_method_header) {
5176 error_setg(errp,
5177 "Changing the encryption format is not supported");
5178 return -ENOTSUP;
5179 }
5180 } else if (g_str_has_prefix(desc->name, "encrypt.")) {
5181 error_setg(errp,
5182 "Changing the encryption parameters is not supported");
5183 return -ENOTSUP;
5184 } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
5185 cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
5186 cluster_size);
5187 if (cluster_size != s->cluster_size) {
5188 error_setg(errp, "Changing the cluster size is not supported");
5189 return -ENOTSUP;
5190 }
5191 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
5192 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
5193 lazy_refcounts);
5194 } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
5195 refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
5196 refcount_bits);
5197
5198 if (refcount_bits <= 0 || refcount_bits > 64 ||
5199 !is_power_of_2(refcount_bits))
5200 {
5201 error_setg(errp, "Refcount width must be a power of two and "
5202 "may not exceed 64 bits");
5203 return -EINVAL;
5204 }
5205 } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) {
5206 data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE);
5207 if (data_file && !has_data_file(bs)) {
5208 error_setg(errp, "data-file can only be set for images that "
5209 "use an external data file");
5210 return -EINVAL;
5211 }
5212 } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) {
5213 data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW,
5214 data_file_raw);
5215 if (data_file_raw && !data_file_is_raw(bs)) {
5216 error_setg(errp, "data-file-raw cannot be set on existing "
5217 "images");
5218 return -EINVAL;
5219 }
5220 } else {
5221
5222
5223 abort();
5224 }
5225
5226 desc++;
5227 }
5228
5229 helper_cb_info = (Qcow2AmendHelperCBInfo){
5230 .original_status_cb = status_cb,
5231 .original_cb_opaque = cb_opaque,
5232 .total_operations = (new_version != old_version)
5233 + (s->refcount_bits != refcount_bits)
5234 };
5235
5236
5237 if (new_version > old_version) {
5238 helper_cb_info.current_operation = QCOW2_UPGRADING;
5239 ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb,
5240 &helper_cb_info, errp);
5241 if (ret < 0) {
5242 return ret;
5243 }
5244 }
5245
5246 if (s->refcount_bits != refcount_bits) {
5247 int refcount_order = ctz32(refcount_bits);
5248
5249 if (new_version < 3 && refcount_bits != 16) {
5250 error_setg(errp, "Refcount widths other than 16 bits require "
5251 "compatibility level 1.1 or above (use compat=1.1 or "
5252 "greater)");
5253 return -EINVAL;
5254 }
5255
5256 helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
5257 ret = qcow2_change_refcount_order(bs, refcount_order,
5258 &qcow2_amend_helper_cb,
5259 &helper_cb_info, errp);
5260 if (ret < 0) {
5261 return ret;
5262 }
5263 }
5264
5265
5266 if (data_file_raw) {
5267 s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5268 } else {
5269 s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5270 }
5271
5272 if (data_file) {
5273 g_free(s->image_data_file);
5274 s->image_data_file = *data_file ? g_strdup(data_file) : NULL;
5275 }
5276
5277 ret = qcow2_update_header(bs);
5278 if (ret < 0) {
5279 error_setg_errno(errp, -ret, "Failed to update the image header");
5280 return ret;
5281 }
5282
5283 if (backing_file || backing_format) {
5284 ret = qcow2_change_backing_file(bs,
5285 backing_file ?: s->image_backing_file,
5286 backing_format ?: s->image_backing_format);
5287 if (ret < 0) {
5288 error_setg_errno(errp, -ret, "Failed to change the backing file");
5289 return ret;
5290 }
5291 }
5292
5293 if (s->use_lazy_refcounts != lazy_refcounts) {
5294 if (lazy_refcounts) {
5295 if (new_version < 3) {
5296 error_setg(errp, "Lazy refcounts only supported with "
5297 "compatibility level 1.1 and above (use compat=1.1 "
5298 "or greater)");
5299 return -EINVAL;
5300 }
5301 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5302 ret = qcow2_update_header(bs);
5303 if (ret < 0) {
5304 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5305 error_setg_errno(errp, -ret, "Failed to update the image header");
5306 return ret;
5307 }
5308 s->use_lazy_refcounts = true;
5309 } else {
5310
5311 ret = qcow2_mark_clean(bs);
5312 if (ret < 0) {
5313 error_setg_errno(errp, -ret, "Failed to make the image clean");
5314 return ret;
5315 }
5316
5317 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5318 ret = qcow2_update_header(bs);
5319 if (ret < 0) {
5320 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5321 error_setg_errno(errp, -ret, "Failed to update the image header");
5322 return ret;
5323 }
5324 s->use_lazy_refcounts = false;
5325 }
5326 }
5327
5328 if (new_size) {
5329 BlockBackend *blk = blk_new(bdrv_get_aio_context(bs),
5330 BLK_PERM_RESIZE, BLK_PERM_ALL);
5331 ret = blk_insert_bs(blk, bs, errp);
5332 if (ret < 0) {
5333 blk_unref(blk);
5334 return ret;
5335 }
5336
5337
5338
5339
5340
5341 ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp);
5342 blk_unref(blk);
5343 if (ret < 0) {
5344 return ret;
5345 }
5346 }
5347
5348
5349 if (new_version < old_version) {
5350 helper_cb_info.current_operation = QCOW2_DOWNGRADING;
5351 ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
5352 &helper_cb_info, errp);
5353 if (ret < 0) {
5354 return ret;
5355 }
5356 }
5357
5358 return 0;
5359}
5360
5361
5362
5363
5364
5365
5366
5367void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
5368 int64_t size, const char *message_format, ...)
5369{
5370 BDRVQcow2State *s = bs->opaque;
5371 const char *node_name;
5372 char *message;
5373 va_list ap;
5374
5375 fatal = fatal && bdrv_is_writable(bs);
5376
5377 if (s->signaled_corruption &&
5378 (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
5379 {
5380 return;
5381 }
5382
5383 va_start(ap, message_format);
5384 message = g_strdup_vprintf(message_format, ap);
5385 va_end(ap);
5386
5387 if (fatal) {
5388 fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
5389 "corruption events will be suppressed\n", message);
5390 } else {
5391 fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
5392 "corruption events will be suppressed\n", message);
5393 }
5394
5395 node_name = bdrv_get_node_name(bs);
5396 qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
5397 *node_name != '\0', node_name,
5398 message, offset >= 0, offset,
5399 size >= 0, size,
5400 fatal);
5401 g_free(message);
5402
5403 if (fatal) {
5404 qcow2_mark_corrupt(bs);
5405 bs->drv = NULL;
5406 }
5407
5408 s->signaled_corruption = true;
5409}
5410
5411static QemuOptsList qcow2_create_opts = {
5412 .name = "qcow2-create-opts",
5413 .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
5414 .desc = {
5415 {
5416 .name = BLOCK_OPT_SIZE,
5417 .type = QEMU_OPT_SIZE,
5418 .help = "Virtual disk size"
5419 },
5420 {
5421 .name = BLOCK_OPT_COMPAT_LEVEL,
5422 .type = QEMU_OPT_STRING,
5423 .help = "Compatibility level (v2 [0.10] or v3 [1.1])"
5424 },
5425 {
5426 .name = BLOCK_OPT_BACKING_FILE,
5427 .type = QEMU_OPT_STRING,
5428 .help = "File name of a base image"
5429 },
5430 {
5431 .name = BLOCK_OPT_BACKING_FMT,
5432 .type = QEMU_OPT_STRING,
5433 .help = "Image format of the base image"
5434 },
5435 {
5436 .name = BLOCK_OPT_DATA_FILE,
5437 .type = QEMU_OPT_STRING,
5438 .help = "File name of an external data file"
5439 },
5440 {
5441 .name = BLOCK_OPT_DATA_FILE_RAW,
5442 .type = QEMU_OPT_BOOL,
5443 .help = "The external data file must stay valid as a raw image"
5444 },
5445 {
5446 .name = BLOCK_OPT_ENCRYPT,
5447 .type = QEMU_OPT_BOOL,
5448 .help = "Encrypt the image with format 'aes'. (Deprecated "
5449 "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
5450 },
5451 {
5452 .name = BLOCK_OPT_ENCRYPT_FORMAT,
5453 .type = QEMU_OPT_STRING,
5454 .help = "Encrypt the image, format choices: 'aes', 'luks'",
5455 },
5456 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
5457 "ID of secret providing qcow AES key or LUKS passphrase"),
5458 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."),
5459 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."),
5460 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."),
5461 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."),
5462 BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."),
5463 BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
5464 {
5465 .name = BLOCK_OPT_CLUSTER_SIZE,
5466 .type = QEMU_OPT_SIZE,
5467 .help = "qcow2 cluster size",
5468 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
5469 },
5470 {
5471 .name = BLOCK_OPT_PREALLOC,
5472 .type = QEMU_OPT_STRING,
5473 .help = "Preallocation mode (allowed values: off, metadata, "
5474 "falloc, full)"
5475 },
5476 {
5477 .name = BLOCK_OPT_LAZY_REFCOUNTS,
5478 .type = QEMU_OPT_BOOL,
5479 .help = "Postpone refcount updates",
5480 .def_value_str = "off"
5481 },
5482 {
5483 .name = BLOCK_OPT_REFCOUNT_BITS,
5484 .type = QEMU_OPT_NUMBER,
5485 .help = "Width of a reference count entry in bits",
5486 .def_value_str = "16"
5487 },
5488 { }
5489 }
5490};
5491
5492static const char *const qcow2_strong_runtime_opts[] = {
5493 "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,
5494
5495 NULL
5496};
5497
5498BlockDriver bdrv_qcow2 = {
5499 .format_name = "qcow2",
5500 .instance_size = sizeof(BDRVQcow2State),
5501 .bdrv_probe = qcow2_probe,
5502 .bdrv_open = qcow2_open,
5503 .bdrv_close = qcow2_close,
5504 .bdrv_reopen_prepare = qcow2_reopen_prepare,
5505 .bdrv_reopen_commit = qcow2_reopen_commit,
5506 .bdrv_reopen_abort = qcow2_reopen_abort,
5507 .bdrv_join_options = qcow2_join_options,
5508 .bdrv_child_perm = bdrv_format_default_perms,
5509 .bdrv_co_create_opts = qcow2_co_create_opts,
5510 .bdrv_co_create = qcow2_co_create,
5511 .bdrv_has_zero_init = qcow2_has_zero_init,
5512 .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
5513 .bdrv_co_block_status = qcow2_co_block_status,
5514
5515 .bdrv_co_preadv_part = qcow2_co_preadv_part,
5516 .bdrv_co_pwritev_part = qcow2_co_pwritev_part,
5517 .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
5518
5519 .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
5520 .bdrv_co_pdiscard = qcow2_co_pdiscard,
5521 .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
5522 .bdrv_co_copy_range_to = qcow2_co_copy_range_to,
5523 .bdrv_co_truncate = qcow2_co_truncate,
5524 .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
5525 .bdrv_make_empty = qcow2_make_empty,
5526
5527 .bdrv_snapshot_create = qcow2_snapshot_create,
5528 .bdrv_snapshot_goto = qcow2_snapshot_goto,
5529 .bdrv_snapshot_delete = qcow2_snapshot_delete,
5530 .bdrv_snapshot_list = qcow2_snapshot_list,
5531 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
5532 .bdrv_measure = qcow2_measure,
5533 .bdrv_get_info = qcow2_get_info,
5534 .bdrv_get_specific_info = qcow2_get_specific_info,
5535
5536 .bdrv_save_vmstate = qcow2_save_vmstate,
5537 .bdrv_load_vmstate = qcow2_load_vmstate,
5538
5539 .supports_backing = true,
5540 .bdrv_change_backing_file = qcow2_change_backing_file,
5541
5542 .bdrv_refresh_limits = qcow2_refresh_limits,
5543 .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache,
5544 .bdrv_inactivate = qcow2_inactivate,
5545
5546 .create_opts = &qcow2_create_opts,
5547 .strong_runtime_opts = qcow2_strong_runtime_opts,
5548 .mutable_opts = mutable_opts,
5549 .bdrv_co_check = qcow2_co_check,
5550 .bdrv_amend_options = qcow2_amend_options,
5551
5552 .bdrv_detach_aio_context = qcow2_detach_aio_context,
5553 .bdrv_attach_aio_context = qcow2_attach_aio_context,
5554
5555 .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap,
5556 .bdrv_co_remove_persistent_dirty_bitmap =
5557 qcow2_co_remove_persistent_dirty_bitmap,
5558};
5559
5560static void bdrv_qcow2_init(void)
5561{
5562 bdrv_register(&bdrv_qcow2);
5563}
5564
5565block_init(bdrv_qcow2_init);
5566