1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26
27#include "block/qdict.h"
28#include "sysemu/block-backend.h"
29#include "qemu/main-loop.h"
30#include "qemu/module.h"
31#include "qcow2.h"
32#include "qemu/error-report.h"
33#include "qapi/error.h"
34#include "qapi/qapi-events-block-core.h"
35#include "qapi/qmp/qdict.h"
36#include "qapi/qmp/qstring.h"
37#include "trace.h"
38#include "qemu/option_int.h"
39#include "qemu/cutils.h"
40#include "qemu/bswap.h"
41#include "qapi/qobject-input-visitor.h"
42#include "qapi/qapi-visit-block-core.h"
43#include "crypto.h"
44#include "block/aio_task.h"
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63typedef struct {
64 uint32_t magic;
65 uint32_t len;
66} QEMU_PACKED QCowExtension;
67
68#define QCOW2_EXT_MAGIC_END 0
69#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
70#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
71#define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
72#define QCOW2_EXT_MAGIC_BITMAPS 0x23852875
73#define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
74
75static int coroutine_fn
76qcow2_co_preadv_compressed(BlockDriverState *bs,
77 uint64_t file_cluster_offset,
78 uint64_t offset,
79 uint64_t bytes,
80 QEMUIOVector *qiov,
81 size_t qiov_offset);
82
83static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
84{
85 const QCowHeader *cow_header = (const void *)buf;
86
87 if (buf_size >= sizeof(QCowHeader) &&
88 be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
89 be32_to_cpu(cow_header->version) >= 2)
90 return 100;
91 else
92 return 0;
93}
94
95
96static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
97 uint8_t *buf, size_t buflen,
98 void *opaque, Error **errp)
99{
100 BlockDriverState *bs = opaque;
101 BDRVQcow2State *s = bs->opaque;
102 ssize_t ret;
103
104 if ((offset + buflen) > s->crypto_header.length) {
105 error_setg(errp, "Request for data outside of extension header");
106 return -1;
107 }
108
109 ret = bdrv_pread(bs->file,
110 s->crypto_header.offset + offset, buf, buflen);
111 if (ret < 0) {
112 error_setg_errno(errp, -ret, "Could not read encryption header");
113 return -1;
114 }
115 return ret;
116}
117
118
119static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
120 void *opaque, Error **errp)
121{
122 BlockDriverState *bs = opaque;
123 BDRVQcow2State *s = bs->opaque;
124 int64_t ret;
125 int64_t clusterlen;
126
127 ret = qcow2_alloc_clusters(bs, headerlen);
128 if (ret < 0) {
129 error_setg_errno(errp, -ret,
130 "Cannot allocate cluster for LUKS header size %zu",
131 headerlen);
132 return -1;
133 }
134
135 s->crypto_header.length = headerlen;
136 s->crypto_header.offset = ret;
137
138
139
140 clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
141 assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
142 ret = bdrv_pwrite_zeroes(bs->file,
143 ret + headerlen,
144 clusterlen - headerlen, 0);
145 if (ret < 0) {
146 error_setg_errno(errp, -ret, "Could not zero fill encryption header");
147 return -1;
148 }
149
150 return ret;
151}
152
153
154static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
155 const uint8_t *buf, size_t buflen,
156 void *opaque, Error **errp)
157{
158 BlockDriverState *bs = opaque;
159 BDRVQcow2State *s = bs->opaque;
160 ssize_t ret;
161
162 if ((offset + buflen) > s->crypto_header.length) {
163 error_setg(errp, "Request for data outside of extension header");
164 return -1;
165 }
166
167 ret = bdrv_pwrite(bs->file,
168 s->crypto_header.offset + offset, buf, buflen);
169 if (ret < 0) {
170 error_setg_errno(errp, -ret, "Could not read encryption header");
171 return -1;
172 }
173 return ret;
174}
175
176
177
178
179
180
181
182
183
184static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
185 uint64_t end_offset, void **p_feature_table,
186 int flags, bool *need_update_header,
187 Error **errp)
188{
189 BDRVQcow2State *s = bs->opaque;
190 QCowExtension ext;
191 uint64_t offset;
192 int ret;
193 Qcow2BitmapHeaderExt bitmaps_ext;
194
195 if (need_update_header != NULL) {
196 *need_update_header = false;
197 }
198
199#ifdef DEBUG_EXT
200 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
201#endif
202 offset = start_offset;
203 while (offset < end_offset) {
204
205#ifdef DEBUG_EXT
206
207 if (offset > s->cluster_size)
208 printf("qcow2_read_extension: suspicious offset %lu\n", offset);
209
210 printf("attempting to read extended header in offset %lu\n", offset);
211#endif
212
213 ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
214 if (ret < 0) {
215 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
216 "pread fail from offset %" PRIu64, offset);
217 return 1;
218 }
219 ext.magic = be32_to_cpu(ext.magic);
220 ext.len = be32_to_cpu(ext.len);
221 offset += sizeof(ext);
222#ifdef DEBUG_EXT
223 printf("ext.magic = 0x%x\n", ext.magic);
224#endif
225 if (offset > end_offset || ext.len > end_offset - offset) {
226 error_setg(errp, "Header extension too large");
227 return -EINVAL;
228 }
229
230 switch (ext.magic) {
231 case QCOW2_EXT_MAGIC_END:
232 return 0;
233
234 case QCOW2_EXT_MAGIC_BACKING_FORMAT:
235 if (ext.len >= sizeof(bs->backing_format)) {
236 error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
237 " too large (>=%zu)", ext.len,
238 sizeof(bs->backing_format));
239 return 2;
240 }
241 ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
242 if (ret < 0) {
243 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
244 "Could not read format name");
245 return 3;
246 }
247 bs->backing_format[ext.len] = '\0';
248 s->image_backing_format = g_strdup(bs->backing_format);
249#ifdef DEBUG_EXT
250 printf("Qcow2: Got format extension %s\n", bs->backing_format);
251#endif
252 break;
253
254 case QCOW2_EXT_MAGIC_FEATURE_TABLE:
255 if (p_feature_table != NULL) {
256 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
257 ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
258 if (ret < 0) {
259 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
260 "Could not read table");
261 return ret;
262 }
263
264 *p_feature_table = feature_table;
265 }
266 break;
267
268 case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
269 unsigned int cflags = 0;
270 if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
271 error_setg(errp, "CRYPTO header extension only "
272 "expected with LUKS encryption method");
273 return -EINVAL;
274 }
275 if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
276 error_setg(errp, "CRYPTO header extension size %u, "
277 "but expected size %zu", ext.len,
278 sizeof(Qcow2CryptoHeaderExtension));
279 return -EINVAL;
280 }
281
282 ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len);
283 if (ret < 0) {
284 error_setg_errno(errp, -ret,
285 "Unable to read CRYPTO header extension");
286 return ret;
287 }
288 s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
289 s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
290
291 if ((s->crypto_header.offset % s->cluster_size) != 0) {
292 error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
293 "not a multiple of cluster size '%u'",
294 s->crypto_header.offset, s->cluster_size);
295 return -EINVAL;
296 }
297
298 if (flags & BDRV_O_NO_IO) {
299 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
300 }
301 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
302 qcow2_crypto_hdr_read_func,
303 bs, cflags, QCOW2_MAX_THREADS, errp);
304 if (!s->crypto) {
305 return -EINVAL;
306 }
307 } break;
308
309 case QCOW2_EXT_MAGIC_BITMAPS:
310 if (ext.len != sizeof(bitmaps_ext)) {
311 error_setg_errno(errp, -ret, "bitmaps_ext: "
312 "Invalid extension length");
313 return -EINVAL;
314 }
315
316 if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
317 if (s->qcow_version < 3) {
318
319 warn_report("This qcow2 v2 image contains bitmaps, but "
320 "they may have been modified by a program "
321 "without persistent bitmap support; so now "
322 "they must all be considered inconsistent");
323 } else {
324 warn_report("a program lacking bitmap support "
325 "modified this file, so all bitmaps are now "
326 "considered inconsistent");
327 }
328 error_printf("Some clusters may be leaked, "
329 "run 'qemu-img check -r' on the image "
330 "file to fix.");
331 if (need_update_header != NULL) {
332
333 *need_update_header = true;
334 }
335 break;
336 }
337
338 ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len);
339 if (ret < 0) {
340 error_setg_errno(errp, -ret, "bitmaps_ext: "
341 "Could not read ext header");
342 return ret;
343 }
344
345 if (bitmaps_ext.reserved32 != 0) {
346 error_setg_errno(errp, -ret, "bitmaps_ext: "
347 "Reserved field is not zero");
348 return -EINVAL;
349 }
350
351 bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps);
352 bitmaps_ext.bitmap_directory_size =
353 be64_to_cpu(bitmaps_ext.bitmap_directory_size);
354 bitmaps_ext.bitmap_directory_offset =
355 be64_to_cpu(bitmaps_ext.bitmap_directory_offset);
356
357 if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
358 error_setg(errp,
359 "bitmaps_ext: Image has %" PRIu32 " bitmaps, "
360 "exceeding the QEMU supported maximum of %d",
361 bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
362 return -EINVAL;
363 }
364
365 if (bitmaps_ext.nb_bitmaps == 0) {
366 error_setg(errp, "found bitmaps extension with zero bitmaps");
367 return -EINVAL;
368 }
369
370 if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) {
371 error_setg(errp, "bitmaps_ext: "
372 "invalid bitmap directory offset");
373 return -EINVAL;
374 }
375
376 if (bitmaps_ext.bitmap_directory_size >
377 QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
378 error_setg(errp, "bitmaps_ext: "
379 "bitmap directory size (%" PRIu64 ") exceeds "
380 "the maximum supported size (%d)",
381 bitmaps_ext.bitmap_directory_size,
382 QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
383 return -EINVAL;
384 }
385
386 s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
387 s->bitmap_directory_offset =
388 bitmaps_ext.bitmap_directory_offset;
389 s->bitmap_directory_size =
390 bitmaps_ext.bitmap_directory_size;
391
392#ifdef DEBUG_EXT
393 printf("Qcow2: Got bitmaps extension: "
394 "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
395 s->bitmap_directory_offset, s->nb_bitmaps);
396#endif
397 break;
398
399 case QCOW2_EXT_MAGIC_DATA_FILE:
400 {
401 s->image_data_file = g_malloc0(ext.len + 1);
402 ret = bdrv_pread(bs->file, offset, s->image_data_file, ext.len);
403 if (ret < 0) {
404 error_setg_errno(errp, -ret,
405 "ERROR: Could not read data file name");
406 return ret;
407 }
408#ifdef DEBUG_EXT
409 printf("Qcow2: Got external data file %s\n", s->image_data_file);
410#endif
411 break;
412 }
413
414 default:
415
416
417
418 {
419 Qcow2UnknownHeaderExtension *uext;
420
421 uext = g_malloc0(sizeof(*uext) + ext.len);
422 uext->magic = ext.magic;
423 uext->len = ext.len;
424 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
425
426 ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
427 if (ret < 0) {
428 error_setg_errno(errp, -ret, "ERROR: unknown extension: "
429 "Could not read data");
430 return ret;
431 }
432 }
433 break;
434 }
435
436 offset += ((ext.len + 7) & ~7);
437 }
438
439 return 0;
440}
441
442static void cleanup_unknown_header_ext(BlockDriverState *bs)
443{
444 BDRVQcow2State *s = bs->opaque;
445 Qcow2UnknownHeaderExtension *uext, *next;
446
447 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
448 QLIST_REMOVE(uext, next);
449 g_free(uext);
450 }
451}
452
453static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
454 uint64_t mask)
455{
456 char *features = g_strdup("");
457 char *old;
458
459 while (table && table->name[0] != '\0') {
460 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
461 if (mask & (1ULL << table->bit)) {
462 old = features;
463 features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
464 table->name);
465 g_free(old);
466 mask &= ~(1ULL << table->bit);
467 }
468 }
469 table++;
470 }
471
472 if (mask) {
473 old = features;
474 features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
475 old, *old ? ", " : "", mask);
476 g_free(old);
477 }
478
479 error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
480 g_free(features);
481}
482
483
484
485
486
487
488
489
490int qcow2_mark_dirty(BlockDriverState *bs)
491{
492 BDRVQcow2State *s = bs->opaque;
493 uint64_t val;
494 int ret;
495
496 assert(s->qcow_version >= 3);
497
498 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
499 return 0;
500 }
501
502 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
503 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
504 &val, sizeof(val));
505 if (ret < 0) {
506 return ret;
507 }
508 ret = bdrv_flush(bs->file->bs);
509 if (ret < 0) {
510 return ret;
511 }
512
513
514 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
515 return 0;
516}
517
518
519
520
521
522
523static int qcow2_mark_clean(BlockDriverState *bs)
524{
525 BDRVQcow2State *s = bs->opaque;
526
527 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
528 int ret;
529
530 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
531
532 ret = qcow2_flush_caches(bs);
533 if (ret < 0) {
534 return ret;
535 }
536
537 return qcow2_update_header(bs);
538 }
539 return 0;
540}
541
542
543
544
545int qcow2_mark_corrupt(BlockDriverState *bs)
546{
547 BDRVQcow2State *s = bs->opaque;
548
549 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
550 return qcow2_update_header(bs);
551}
552
553
554
555
556
557int qcow2_mark_consistent(BlockDriverState *bs)
558{
559 BDRVQcow2State *s = bs->opaque;
560
561 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
562 int ret = qcow2_flush_caches(bs);
563 if (ret < 0) {
564 return ret;
565 }
566
567 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
568 return qcow2_update_header(bs);
569 }
570 return 0;
571}
572
573static void qcow2_add_check_result(BdrvCheckResult *out,
574 const BdrvCheckResult *src,
575 bool set_allocation_info)
576{
577 out->corruptions += src->corruptions;
578 out->leaks += src->leaks;
579 out->check_errors += src->check_errors;
580 out->corruptions_fixed += src->corruptions_fixed;
581 out->leaks_fixed += src->leaks_fixed;
582
583 if (set_allocation_info) {
584 out->image_end_offset = src->image_end_offset;
585 out->bfi = src->bfi;
586 }
587}
588
589static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
590 BdrvCheckResult *result,
591 BdrvCheckMode fix)
592{
593 BdrvCheckResult snapshot_res = {};
594 BdrvCheckResult refcount_res = {};
595 int ret;
596
597 memset(result, 0, sizeof(*result));
598
599 ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix);
600 if (ret < 0) {
601 qcow2_add_check_result(result, &snapshot_res, false);
602 return ret;
603 }
604
605 ret = qcow2_check_refcounts(bs, &refcount_res, fix);
606 qcow2_add_check_result(result, &refcount_res, true);
607 if (ret < 0) {
608 qcow2_add_check_result(result, &snapshot_res, false);
609 return ret;
610 }
611
612 ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix);
613 qcow2_add_check_result(result, &snapshot_res, false);
614 if (ret < 0) {
615 return ret;
616 }
617
618 if (fix && result->check_errors == 0 && result->corruptions == 0) {
619 ret = qcow2_mark_clean(bs);
620 if (ret < 0) {
621 return ret;
622 }
623 return qcow2_mark_consistent(bs);
624 }
625 return ret;
626}
627
628static int coroutine_fn qcow2_co_check(BlockDriverState *bs,
629 BdrvCheckResult *result,
630 BdrvCheckMode fix)
631{
632 BDRVQcow2State *s = bs->opaque;
633 int ret;
634
635 qemu_co_mutex_lock(&s->lock);
636 ret = qcow2_co_check_locked(bs, result, fix);
637 qemu_co_mutex_unlock(&s->lock);
638 return ret;
639}
640
641int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
642 uint64_t entries, size_t entry_len,
643 int64_t max_size_bytes, const char *table_name,
644 Error **errp)
645{
646 BDRVQcow2State *s = bs->opaque;
647
648 if (entries > max_size_bytes / entry_len) {
649 error_setg(errp, "%s too large", table_name);
650 return -EFBIG;
651 }
652
653
654
655 if ((INT64_MAX - entries * entry_len < offset) ||
656 (offset_into_cluster(s, offset) != 0)) {
657 error_setg(errp, "%s offset invalid", table_name);
658 return -EINVAL;
659 }
660
661 return 0;
662}
663
664static const char *const mutable_opts[] = {
665 QCOW2_OPT_LAZY_REFCOUNTS,
666 QCOW2_OPT_DISCARD_REQUEST,
667 QCOW2_OPT_DISCARD_SNAPSHOT,
668 QCOW2_OPT_DISCARD_OTHER,
669 QCOW2_OPT_OVERLAP,
670 QCOW2_OPT_OVERLAP_TEMPLATE,
671 QCOW2_OPT_OVERLAP_MAIN_HEADER,
672 QCOW2_OPT_OVERLAP_ACTIVE_L1,
673 QCOW2_OPT_OVERLAP_ACTIVE_L2,
674 QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
675 QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
676 QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
677 QCOW2_OPT_OVERLAP_INACTIVE_L1,
678 QCOW2_OPT_OVERLAP_INACTIVE_L2,
679 QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
680 QCOW2_OPT_CACHE_SIZE,
681 QCOW2_OPT_L2_CACHE_SIZE,
682 QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
683 QCOW2_OPT_REFCOUNT_CACHE_SIZE,
684 QCOW2_OPT_CACHE_CLEAN_INTERVAL,
685 NULL
686};
687
688static QemuOptsList qcow2_runtime_opts = {
689 .name = "qcow2",
690 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
691 .desc = {
692 {
693 .name = QCOW2_OPT_LAZY_REFCOUNTS,
694 .type = QEMU_OPT_BOOL,
695 .help = "Postpone refcount updates",
696 },
697 {
698 .name = QCOW2_OPT_DISCARD_REQUEST,
699 .type = QEMU_OPT_BOOL,
700 .help = "Pass guest discard requests to the layer below",
701 },
702 {
703 .name = QCOW2_OPT_DISCARD_SNAPSHOT,
704 .type = QEMU_OPT_BOOL,
705 .help = "Generate discard requests when snapshot related space "
706 "is freed",
707 },
708 {
709 .name = QCOW2_OPT_DISCARD_OTHER,
710 .type = QEMU_OPT_BOOL,
711 .help = "Generate discard requests when other clusters are freed",
712 },
713 {
714 .name = QCOW2_OPT_OVERLAP,
715 .type = QEMU_OPT_STRING,
716 .help = "Selects which overlap checks to perform from a range of "
717 "templates (none, constant, cached, all)",
718 },
719 {
720 .name = QCOW2_OPT_OVERLAP_TEMPLATE,
721 .type = QEMU_OPT_STRING,
722 .help = "Selects which overlap checks to perform from a range of "
723 "templates (none, constant, cached, all)",
724 },
725 {
726 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
727 .type = QEMU_OPT_BOOL,
728 .help = "Check for unintended writes into the main qcow2 header",
729 },
730 {
731 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
732 .type = QEMU_OPT_BOOL,
733 .help = "Check for unintended writes into the active L1 table",
734 },
735 {
736 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
737 .type = QEMU_OPT_BOOL,
738 .help = "Check for unintended writes into an active L2 table",
739 },
740 {
741 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
742 .type = QEMU_OPT_BOOL,
743 .help = "Check for unintended writes into the refcount table",
744 },
745 {
746 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
747 .type = QEMU_OPT_BOOL,
748 .help = "Check for unintended writes into a refcount block",
749 },
750 {
751 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
752 .type = QEMU_OPT_BOOL,
753 .help = "Check for unintended writes into the snapshot table",
754 },
755 {
756 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
757 .type = QEMU_OPT_BOOL,
758 .help = "Check for unintended writes into an inactive L1 table",
759 },
760 {
761 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
762 .type = QEMU_OPT_BOOL,
763 .help = "Check for unintended writes into an inactive L2 table",
764 },
765 {
766 .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
767 .type = QEMU_OPT_BOOL,
768 .help = "Check for unintended writes into the bitmap directory",
769 },
770 {
771 .name = QCOW2_OPT_CACHE_SIZE,
772 .type = QEMU_OPT_SIZE,
773 .help = "Maximum combined metadata (L2 tables and refcount blocks) "
774 "cache size",
775 },
776 {
777 .name = QCOW2_OPT_L2_CACHE_SIZE,
778 .type = QEMU_OPT_SIZE,
779 .help = "Maximum L2 table cache size",
780 },
781 {
782 .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
783 .type = QEMU_OPT_SIZE,
784 .help = "Size of each entry in the L2 cache",
785 },
786 {
787 .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
788 .type = QEMU_OPT_SIZE,
789 .help = "Maximum refcount block cache size",
790 },
791 {
792 .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
793 .type = QEMU_OPT_NUMBER,
794 .help = "Clean unused cache entries after this time (in seconds)",
795 },
796 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
797 "ID of secret providing qcow2 AES key or LUKS passphrase"),
798 { }
799 },
800};
801
802static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
803 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER,
804 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1,
805 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2,
806 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
807 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
808 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
809 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1,
810 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
811 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
812};
813
814static void cache_clean_timer_cb(void *opaque)
815{
816 BlockDriverState *bs = opaque;
817 BDRVQcow2State *s = bs->opaque;
818 qcow2_cache_clean_unused(s->l2_table_cache);
819 qcow2_cache_clean_unused(s->refcount_block_cache);
820 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
821 (int64_t) s->cache_clean_interval * 1000);
822}
823
824static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
825{
826 BDRVQcow2State *s = bs->opaque;
827 if (s->cache_clean_interval > 0) {
828 s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
829 SCALE_MS, cache_clean_timer_cb,
830 bs);
831 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
832 (int64_t) s->cache_clean_interval * 1000);
833 }
834}
835
836static void cache_clean_timer_del(BlockDriverState *bs)
837{
838 BDRVQcow2State *s = bs->opaque;
839 if (s->cache_clean_timer) {
840 timer_del(s->cache_clean_timer);
841 timer_free(s->cache_clean_timer);
842 s->cache_clean_timer = NULL;
843 }
844}
845
846static void qcow2_detach_aio_context(BlockDriverState *bs)
847{
848 cache_clean_timer_del(bs);
849}
850
851static void qcow2_attach_aio_context(BlockDriverState *bs,
852 AioContext *new_context)
853{
854 cache_clean_timer_init(bs, new_context);
855}
856
857static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
858 uint64_t *l2_cache_size,
859 uint64_t *l2_cache_entry_size,
860 uint64_t *refcount_cache_size, Error **errp)
861{
862 BDRVQcow2State *s = bs->opaque;
863 uint64_t combined_cache_size, l2_cache_max_setting;
864 bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
865 bool l2_cache_entry_size_set;
866 int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
867 uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
868 uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
869
870
871 uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t),
872 s->cluster_size);
873
874 combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
875 l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
876 refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
877 l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE);
878
879 combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
880 l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE,
881 DEFAULT_L2_CACHE_MAX_SIZE);
882 *refcount_cache_size = qemu_opt_get_size(opts,
883 QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
884
885 *l2_cache_entry_size = qemu_opt_get_size(
886 opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
887
888 *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);
889
890 if (combined_cache_size_set) {
891 if (l2_cache_size_set && refcount_cache_size_set) {
892 error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
893 " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
894 "at the same time");
895 return;
896 } else if (l2_cache_size_set &&
897 (l2_cache_max_setting > combined_cache_size)) {
898 error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
899 QCOW2_OPT_CACHE_SIZE);
900 return;
901 } else if (*refcount_cache_size > combined_cache_size) {
902 error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
903 QCOW2_OPT_CACHE_SIZE);
904 return;
905 }
906
907 if (l2_cache_size_set) {
908 *refcount_cache_size = combined_cache_size - *l2_cache_size;
909 } else if (refcount_cache_size_set) {
910 *l2_cache_size = combined_cache_size - *refcount_cache_size;
911 } else {
912
913
914 if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
915 *l2_cache_size = max_l2_cache;
916 *refcount_cache_size = combined_cache_size - *l2_cache_size;
917 } else {
918 *refcount_cache_size =
919 MIN(combined_cache_size, min_refcount_cache);
920 *l2_cache_size = combined_cache_size - *refcount_cache_size;
921 }
922 }
923 }
924
925
926
927
928
929
930 if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) {
931 *l2_cache_entry_size = MIN(s->cluster_size, 4096);
932 }
933
934
935
936
937 if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
938 *l2_cache_entry_size > s->cluster_size ||
939 !is_power_of_2(*l2_cache_entry_size)) {
940 error_setg(errp, "L2 cache entry size must be a power of two "
941 "between %d and the cluster size (%d)",
942 1 << MIN_CLUSTER_BITS, s->cluster_size);
943 return;
944 }
945}
946
947typedef struct Qcow2ReopenState {
948 Qcow2Cache *l2_table_cache;
949 Qcow2Cache *refcount_block_cache;
950 int l2_slice_size;
951 bool use_lazy_refcounts;
952 int overlap_check;
953 bool discard_passthrough[QCOW2_DISCARD_MAX];
954 uint64_t cache_clean_interval;
955 QCryptoBlockOpenOptions *crypto_opts;
956} Qcow2ReopenState;
957
958static int qcow2_update_options_prepare(BlockDriverState *bs,
959 Qcow2ReopenState *r,
960 QDict *options, int flags,
961 Error **errp)
962{
963 BDRVQcow2State *s = bs->opaque;
964 QemuOpts *opts = NULL;
965 const char *opt_overlap_check, *opt_overlap_check_template;
966 int overlap_check_template = 0;
967 uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
968 int i;
969 const char *encryptfmt;
970 QDict *encryptopts = NULL;
971 Error *local_err = NULL;
972 int ret;
973
974 qdict_extract_subqdict(options, &encryptopts, "encrypt.");
975 encryptfmt = qdict_get_try_str(encryptopts, "format");
976
977 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
978 qemu_opts_absorb_qdict(opts, options, &local_err);
979 if (local_err) {
980 error_propagate(errp, local_err);
981 ret = -EINVAL;
982 goto fail;
983 }
984
985
986 read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
987 &refcount_cache_size, &local_err);
988 if (local_err) {
989 error_propagate(errp, local_err);
990 ret = -EINVAL;
991 goto fail;
992 }
993
994 l2_cache_size /= l2_cache_entry_size;
995 if (l2_cache_size < MIN_L2_CACHE_SIZE) {
996 l2_cache_size = MIN_L2_CACHE_SIZE;
997 }
998 if (l2_cache_size > INT_MAX) {
999 error_setg(errp, "L2 cache size too big");
1000 ret = -EINVAL;
1001 goto fail;
1002 }
1003
1004 refcount_cache_size /= s->cluster_size;
1005 if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
1006 refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
1007 }
1008 if (refcount_cache_size > INT_MAX) {
1009 error_setg(errp, "Refcount cache size too big");
1010 ret = -EINVAL;
1011 goto fail;
1012 }
1013
1014
1015 if (s->l2_table_cache) {
1016 ret = qcow2_cache_flush(bs, s->l2_table_cache);
1017 if (ret) {
1018 error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
1019 goto fail;
1020 }
1021 }
1022
1023 if (s->refcount_block_cache) {
1024 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
1025 if (ret) {
1026 error_setg_errno(errp, -ret,
1027 "Failed to flush the refcount block cache");
1028 goto fail;
1029 }
1030 }
1031
1032 r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t);
1033 r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
1034 l2_cache_entry_size);
1035 r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
1036 s->cluster_size);
1037 if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
1038 error_setg(errp, "Could not allocate metadata caches");
1039 ret = -ENOMEM;
1040 goto fail;
1041 }
1042
1043
1044 r->cache_clean_interval =
1045 qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
1046 DEFAULT_CACHE_CLEAN_INTERVAL);
1047#ifndef CONFIG_LINUX
1048 if (r->cache_clean_interval != 0) {
1049 error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
1050 " not supported on this host");
1051 ret = -EINVAL;
1052 goto fail;
1053 }
1054#endif
1055 if (r->cache_clean_interval > UINT_MAX) {
1056 error_setg(errp, "Cache clean interval too big");
1057 ret = -EINVAL;
1058 goto fail;
1059 }
1060
1061
1062 r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
1063 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
1064 if (r->use_lazy_refcounts && s->qcow_version < 3) {
1065 error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
1066 "qemu 1.1 compatibility level");
1067 ret = -EINVAL;
1068 goto fail;
1069 }
1070
1071 if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
1072 ret = qcow2_mark_clean(bs);
1073 if (ret < 0) {
1074 error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
1075 goto fail;
1076 }
1077 }
1078
1079
1080 opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
1081 opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
1082 if (opt_overlap_check_template && opt_overlap_check &&
1083 strcmp(opt_overlap_check_template, opt_overlap_check))
1084 {
1085 error_setg(errp, "Conflicting values for qcow2 options '"
1086 QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
1087 "' ('%s')", opt_overlap_check, opt_overlap_check_template);
1088 ret = -EINVAL;
1089 goto fail;
1090 }
1091 if (!opt_overlap_check) {
1092 opt_overlap_check = opt_overlap_check_template ?: "cached";
1093 }
1094
1095 if (!strcmp(opt_overlap_check, "none")) {
1096 overlap_check_template = 0;
1097 } else if (!strcmp(opt_overlap_check, "constant")) {
1098 overlap_check_template = QCOW2_OL_CONSTANT;
1099 } else if (!strcmp(opt_overlap_check, "cached")) {
1100 overlap_check_template = QCOW2_OL_CACHED;
1101 } else if (!strcmp(opt_overlap_check, "all")) {
1102 overlap_check_template = QCOW2_OL_ALL;
1103 } else {
1104 error_setg(errp, "Unsupported value '%s' for qcow2 option "
1105 "'overlap-check'. Allowed are any of the following: "
1106 "none, constant, cached, all", opt_overlap_check);
1107 ret = -EINVAL;
1108 goto fail;
1109 }
1110
1111 r->overlap_check = 0;
1112 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
1113
1114
1115 r->overlap_check |=
1116 qemu_opt_get_bool(opts, overlap_bool_option_names[i],
1117 overlap_check_template & (1 << i)) << i;
1118 }
1119
1120 r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
1121 r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
1122 r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
1123 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
1124 flags & BDRV_O_UNMAP);
1125 r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
1126 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
1127 r->discard_passthrough[QCOW2_DISCARD_OTHER] =
1128 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
1129
1130 switch (s->crypt_method_header) {
1131 case QCOW_CRYPT_NONE:
1132 if (encryptfmt) {
1133 error_setg(errp, "No encryption in image header, but options "
1134 "specified format '%s'", encryptfmt);
1135 ret = -EINVAL;
1136 goto fail;
1137 }
1138 break;
1139
1140 case QCOW_CRYPT_AES:
1141 if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
1142 error_setg(errp,
1143 "Header reported 'aes' encryption format but "
1144 "options specify '%s'", encryptfmt);
1145 ret = -EINVAL;
1146 goto fail;
1147 }
1148 qdict_put_str(encryptopts, "format", "qcow");
1149 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1150 break;
1151
1152 case QCOW_CRYPT_LUKS:
1153 if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
1154 error_setg(errp,
1155 "Header reported 'luks' encryption format but "
1156 "options specify '%s'", encryptfmt);
1157 ret = -EINVAL;
1158 goto fail;
1159 }
1160 qdict_put_str(encryptopts, "format", "luks");
1161 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1162 break;
1163
1164 default:
1165 error_setg(errp, "Unsupported encryption method %d",
1166 s->crypt_method_header);
1167 break;
1168 }
1169 if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) {
1170 ret = -EINVAL;
1171 goto fail;
1172 }
1173
1174 ret = 0;
1175fail:
1176 qobject_unref(encryptopts);
1177 qemu_opts_del(opts);
1178 opts = NULL;
1179 return ret;
1180}
1181
1182static void qcow2_update_options_commit(BlockDriverState *bs,
1183 Qcow2ReopenState *r)
1184{
1185 BDRVQcow2State *s = bs->opaque;
1186 int i;
1187
1188 if (s->l2_table_cache) {
1189 qcow2_cache_destroy(s->l2_table_cache);
1190 }
1191 if (s->refcount_block_cache) {
1192 qcow2_cache_destroy(s->refcount_block_cache);
1193 }
1194 s->l2_table_cache = r->l2_table_cache;
1195 s->refcount_block_cache = r->refcount_block_cache;
1196 s->l2_slice_size = r->l2_slice_size;
1197
1198 s->overlap_check = r->overlap_check;
1199 s->use_lazy_refcounts = r->use_lazy_refcounts;
1200
1201 for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
1202 s->discard_passthrough[i] = r->discard_passthrough[i];
1203 }
1204
1205 if (s->cache_clean_interval != r->cache_clean_interval) {
1206 cache_clean_timer_del(bs);
1207 s->cache_clean_interval = r->cache_clean_interval;
1208 cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
1209 }
1210
1211 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1212 s->crypto_opts = r->crypto_opts;
1213}
1214
1215static void qcow2_update_options_abort(BlockDriverState *bs,
1216 Qcow2ReopenState *r)
1217{
1218 if (r->l2_table_cache) {
1219 qcow2_cache_destroy(r->l2_table_cache);
1220 }
1221 if (r->refcount_block_cache) {
1222 qcow2_cache_destroy(r->refcount_block_cache);
1223 }
1224 qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
1225}
1226
1227static int qcow2_update_options(BlockDriverState *bs, QDict *options,
1228 int flags, Error **errp)
1229{
1230 Qcow2ReopenState r = {};
1231 int ret;
1232
1233 ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
1234 if (ret >= 0) {
1235 qcow2_update_options_commit(bs, &r);
1236 } else {
1237 qcow2_update_options_abort(bs, &r);
1238 }
1239
1240 return ret;
1241}
1242
1243
1244static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
1245 int flags, Error **errp)
1246{
1247 BDRVQcow2State *s = bs->opaque;
1248 unsigned int len, i;
1249 int ret = 0;
1250 QCowHeader header;
1251 Error *local_err = NULL;
1252 uint64_t ext_end;
1253 uint64_t l1_vm_state_index;
1254 bool update_header = false;
1255
1256 ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
1257 if (ret < 0) {
1258 error_setg_errno(errp, -ret, "Could not read qcow2 header");
1259 goto fail;
1260 }
1261 header.magic = be32_to_cpu(header.magic);
1262 header.version = be32_to_cpu(header.version);
1263 header.backing_file_offset = be64_to_cpu(header.backing_file_offset);
1264 header.backing_file_size = be32_to_cpu(header.backing_file_size);
1265 header.size = be64_to_cpu(header.size);
1266 header.cluster_bits = be32_to_cpu(header.cluster_bits);
1267 header.crypt_method = be32_to_cpu(header.crypt_method);
1268 header.l1_table_offset = be64_to_cpu(header.l1_table_offset);
1269 header.l1_size = be32_to_cpu(header.l1_size);
1270 header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset);
1271 header.refcount_table_clusters =
1272 be32_to_cpu(header.refcount_table_clusters);
1273 header.snapshots_offset = be64_to_cpu(header.snapshots_offset);
1274 header.nb_snapshots = be32_to_cpu(header.nb_snapshots);
1275
1276 if (header.magic != QCOW_MAGIC) {
1277 error_setg(errp, "Image is not in qcow2 format");
1278 ret = -EINVAL;
1279 goto fail;
1280 }
1281 if (header.version < 2 || header.version > 3) {
1282 error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
1283 ret = -ENOTSUP;
1284 goto fail;
1285 }
1286
1287 s->qcow_version = header.version;
1288
1289
1290 if (header.cluster_bits < MIN_CLUSTER_BITS ||
1291 header.cluster_bits > MAX_CLUSTER_BITS) {
1292 error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
1293 header.cluster_bits);
1294 ret = -EINVAL;
1295 goto fail;
1296 }
1297
1298 s->cluster_bits = header.cluster_bits;
1299 s->cluster_size = 1 << s->cluster_bits;
1300
1301
1302 if (header.version == 2) {
1303 header.incompatible_features = 0;
1304 header.compatible_features = 0;
1305 header.autoclear_features = 0;
1306 header.refcount_order = 4;
1307 header.header_length = 72;
1308 } else {
1309 header.incompatible_features =
1310 be64_to_cpu(header.incompatible_features);
1311 header.compatible_features = be64_to_cpu(header.compatible_features);
1312 header.autoclear_features = be64_to_cpu(header.autoclear_features);
1313 header.refcount_order = be32_to_cpu(header.refcount_order);
1314 header.header_length = be32_to_cpu(header.header_length);
1315
1316 if (header.header_length < 104) {
1317 error_setg(errp, "qcow2 header too short");
1318 ret = -EINVAL;
1319 goto fail;
1320 }
1321 }
1322
1323 if (header.header_length > s->cluster_size) {
1324 error_setg(errp, "qcow2 header exceeds cluster size");
1325 ret = -EINVAL;
1326 goto fail;
1327 }
1328
1329 if (header.header_length > sizeof(header)) {
1330 s->unknown_header_fields_size = header.header_length - sizeof(header);
1331 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
1332 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
1333 s->unknown_header_fields_size);
1334 if (ret < 0) {
1335 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
1336 "fields");
1337 goto fail;
1338 }
1339 }
1340
1341 if (header.backing_file_offset > s->cluster_size) {
1342 error_setg(errp, "Invalid backing file offset");
1343 ret = -EINVAL;
1344 goto fail;
1345 }
1346
1347 if (header.backing_file_offset) {
1348 ext_end = header.backing_file_offset;
1349 } else {
1350 ext_end = 1 << header.cluster_bits;
1351 }
1352
1353
1354 s->incompatible_features = header.incompatible_features;
1355 s->compatible_features = header.compatible_features;
1356 s->autoclear_features = header.autoclear_features;
1357
1358 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
1359 void *feature_table = NULL;
1360 qcow2_read_extensions(bs, header.header_length, ext_end,
1361 &feature_table, flags, NULL, NULL);
1362 report_unsupported_feature(errp, feature_table,
1363 s->incompatible_features &
1364 ~QCOW2_INCOMPAT_MASK);
1365 ret = -ENOTSUP;
1366 g_free(feature_table);
1367 goto fail;
1368 }
1369
1370 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
1371
1372
1373 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
1374 error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
1375 "read/write");
1376 ret = -EACCES;
1377 goto fail;
1378 }
1379 }
1380
1381
1382 if (header.refcount_order > 6) {
1383 error_setg(errp, "Reference count entry width too large; may not "
1384 "exceed 64 bits");
1385 ret = -EINVAL;
1386 goto fail;
1387 }
1388 s->refcount_order = header.refcount_order;
1389 s->refcount_bits = 1 << s->refcount_order;
1390 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
1391 s->refcount_max += s->refcount_max - 1;
1392
1393 s->crypt_method_header = header.crypt_method;
1394 if (s->crypt_method_header) {
1395 if (bdrv_uses_whitelist() &&
1396 s->crypt_method_header == QCOW_CRYPT_AES) {
1397 error_setg(errp,
1398 "Use of AES-CBC encrypted qcow2 images is no longer "
1399 "supported in system emulators");
1400 error_append_hint(errp,
1401 "You can use 'qemu-img convert' to convert your "
1402 "image to an alternative supported format, such "
1403 "as unencrypted qcow2, or raw with the LUKS "
1404 "format instead.\n");
1405 ret = -ENOSYS;
1406 goto fail;
1407 }
1408
1409 if (s->crypt_method_header == QCOW_CRYPT_AES) {
1410 s->crypt_physical_offset = false;
1411 } else {
1412
1413
1414
1415 s->crypt_physical_offset = true;
1416 }
1417
1418 bs->encrypted = true;
1419 }
1420
1421 s->l2_bits = s->cluster_bits - 3;
1422 s->l2_size = 1 << s->l2_bits;
1423
1424 s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
1425 s->refcount_block_size = 1 << s->refcount_block_bits;
1426 bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
1427 s->csize_shift = (62 - (s->cluster_bits - 8));
1428 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
1429 s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
1430
1431 s->refcount_table_offset = header.refcount_table_offset;
1432 s->refcount_table_size =
1433 header.refcount_table_clusters << (s->cluster_bits - 3);
1434
1435 if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
1436 error_setg(errp, "Image does not contain a reference count table");
1437 ret = -EINVAL;
1438 goto fail;
1439 }
1440
1441 ret = qcow2_validate_table(bs, s->refcount_table_offset,
1442 header.refcount_table_clusters,
1443 s->cluster_size, QCOW_MAX_REFTABLE_SIZE,
1444 "Reference count table", errp);
1445 if (ret < 0) {
1446 goto fail;
1447 }
1448
1449 if (!(flags & BDRV_O_CHECK)) {
1450
1451
1452
1453
1454
1455
1456 ret = qcow2_validate_table(bs, header.snapshots_offset,
1457 header.nb_snapshots,
1458 sizeof(QCowSnapshotHeader),
1459 sizeof(QCowSnapshotHeader) *
1460 QCOW_MAX_SNAPSHOTS,
1461 "Snapshot table", errp);
1462 if (ret < 0) {
1463 goto fail;
1464 }
1465 }
1466
1467
1468 ret = qcow2_validate_table(bs, header.l1_table_offset,
1469 header.l1_size, sizeof(uint64_t),
1470 QCOW_MAX_L1_SIZE, "Active L1 table", errp);
1471 if (ret < 0) {
1472 goto fail;
1473 }
1474 s->l1_size = header.l1_size;
1475 s->l1_table_offset = header.l1_table_offset;
1476
1477 l1_vm_state_index = size_to_l1(s, header.size);
1478 if (l1_vm_state_index > INT_MAX) {
1479 error_setg(errp, "Image is too big");
1480 ret = -EFBIG;
1481 goto fail;
1482 }
1483 s->l1_vm_state_index = l1_vm_state_index;
1484
1485
1486
1487 if (s->l1_size < s->l1_vm_state_index) {
1488 error_setg(errp, "L1 table is too small");
1489 ret = -EINVAL;
1490 goto fail;
1491 }
1492
1493 if (s->l1_size > 0) {
1494 s->l1_table = qemu_try_blockalign(bs->file->bs,
1495 ROUND_UP(s->l1_size * sizeof(uint64_t), 512));
1496 if (s->l1_table == NULL) {
1497 error_setg(errp, "Could not allocate L1 table");
1498 ret = -ENOMEM;
1499 goto fail;
1500 }
1501 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
1502 s->l1_size * sizeof(uint64_t));
1503 if (ret < 0) {
1504 error_setg_errno(errp, -ret, "Could not read L1 table");
1505 goto fail;
1506 }
1507 for(i = 0;i < s->l1_size; i++) {
1508 s->l1_table[i] = be64_to_cpu(s->l1_table[i]);
1509 }
1510 }
1511
1512
1513 ret = qcow2_update_options(bs, options, flags, errp);
1514 if (ret < 0) {
1515 goto fail;
1516 }
1517
1518 s->flags = flags;
1519
1520 ret = qcow2_refcount_init(bs);
1521 if (ret != 0) {
1522 error_setg_errno(errp, -ret, "Could not initialize refcount handling");
1523 goto fail;
1524 }
1525
1526 QLIST_INIT(&s->cluster_allocs);
1527 QTAILQ_INIT(&s->discards);
1528
1529
1530 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
1531 flags, &update_header, &local_err)) {
1532 error_propagate(errp, local_err);
1533 ret = -EINVAL;
1534 goto fail;
1535 }
1536
1537
1538 s->data_file = bdrv_open_child(NULL, options, "data-file", bs, &child_file,
1539 true, &local_err);
1540 if (local_err) {
1541 error_propagate(errp, local_err);
1542 ret = -EINVAL;
1543 goto fail;
1544 }
1545
1546 if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
1547 if (!s->data_file && s->image_data_file) {
1548 s->data_file = bdrv_open_child(s->image_data_file, options,
1549 "data-file", bs, &child_file,
1550 false, errp);
1551 if (!s->data_file) {
1552 ret = -EINVAL;
1553 goto fail;
1554 }
1555 }
1556 if (!s->data_file) {
1557 error_setg(errp, "'data-file' is required for this image");
1558 ret = -EINVAL;
1559 goto fail;
1560 }
1561 } else {
1562 if (s->data_file) {
1563 error_setg(errp, "'data-file' can only be set for images with an "
1564 "external data file");
1565 ret = -EINVAL;
1566 goto fail;
1567 }
1568
1569 s->data_file = bs->file;
1570
1571 if (data_file_is_raw(bs)) {
1572 error_setg(errp, "data-file-raw requires a data file");
1573 ret = -EINVAL;
1574 goto fail;
1575 }
1576 }
1577
1578
1579
1580
1581
1582 if (s->crypt_method_header && !s->crypto) {
1583 if (s->crypt_method_header == QCOW_CRYPT_AES) {
1584 unsigned int cflags = 0;
1585 if (flags & BDRV_O_NO_IO) {
1586 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
1587 }
1588 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
1589 NULL, NULL, cflags,
1590 QCOW2_MAX_THREADS, errp);
1591 if (!s->crypto) {
1592 ret = -EINVAL;
1593 goto fail;
1594 }
1595 } else if (!(flags & BDRV_O_NO_IO)) {
1596 error_setg(errp, "Missing CRYPTO header for crypt method %d",
1597 s->crypt_method_header);
1598 ret = -EINVAL;
1599 goto fail;
1600 }
1601 }
1602
1603
1604 if (header.backing_file_offset != 0) {
1605 len = header.backing_file_size;
1606 if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
1607 len >= sizeof(bs->backing_file)) {
1608 error_setg(errp, "Backing file name too long");
1609 ret = -EINVAL;
1610 goto fail;
1611 }
1612 ret = bdrv_pread(bs->file, header.backing_file_offset,
1613 bs->auto_backing_file, len);
1614 if (ret < 0) {
1615 error_setg_errno(errp, -ret, "Could not read backing file name");
1616 goto fail;
1617 }
1618 bs->auto_backing_file[len] = '\0';
1619 pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1620 bs->auto_backing_file);
1621 s->image_backing_file = g_strdup(bs->auto_backing_file);
1622 }
1623
1624
1625
1626
1627
1628
1629 if (!(flags & BDRV_O_CHECK)) {
1630 s->snapshots_offset = header.snapshots_offset;
1631 s->nb_snapshots = header.nb_snapshots;
1632
1633 ret = qcow2_read_snapshots(bs, errp);
1634 if (ret < 0) {
1635 goto fail;
1636 }
1637 }
1638
1639
1640 update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
1641 update_header =
1642 update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
1643 if (update_header) {
1644 s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
1645 }
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705 if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
1706
1707 bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err);
1708
1709 update_header = update_header && !header_updated;
1710 }
1711 if (local_err != NULL) {
1712 error_propagate(errp, local_err);
1713 ret = -EINVAL;
1714 goto fail;
1715 }
1716
1717 if (update_header) {
1718 ret = qcow2_update_header(bs);
1719 if (ret < 0) {
1720 error_setg_errno(errp, -ret, "Could not update qcow2 header");
1721 goto fail;
1722 }
1723 }
1724
1725 bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0;
1726
1727
1728 if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
1729 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
1730 BdrvCheckResult result = {0};
1731
1732 ret = qcow2_co_check_locked(bs, &result,
1733 BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
1734 if (ret < 0 || result.check_errors) {
1735 if (ret >= 0) {
1736 ret = -EIO;
1737 }
1738 error_setg_errno(errp, -ret, "Could not repair dirty image");
1739 goto fail;
1740 }
1741 }
1742
1743#ifdef DEBUG_ALLOC
1744 {
1745 BdrvCheckResult result = {0};
1746 qcow2_check_refcounts(bs, &result, 0);
1747 }
1748#endif
1749
1750 qemu_co_queue_init(&s->thread_task_queue);
1751
1752 return ret;
1753
1754 fail:
1755 g_free(s->image_data_file);
1756 if (has_data_file(bs)) {
1757 bdrv_unref_child(bs, s->data_file);
1758 }
1759 g_free(s->unknown_header_fields);
1760 cleanup_unknown_header_ext(bs);
1761 qcow2_free_snapshots(bs);
1762 qcow2_refcount_close(bs);
1763 qemu_vfree(s->l1_table);
1764
1765 s->l1_table = NULL;
1766 cache_clean_timer_del(bs);
1767 if (s->l2_table_cache) {
1768 qcow2_cache_destroy(s->l2_table_cache);
1769 }
1770 if (s->refcount_block_cache) {
1771 qcow2_cache_destroy(s->refcount_block_cache);
1772 }
1773 qcrypto_block_free(s->crypto);
1774 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1775 return ret;
1776}
1777
1778typedef struct QCow2OpenCo {
1779 BlockDriverState *bs;
1780 QDict *options;
1781 int flags;
1782 Error **errp;
1783 int ret;
1784} QCow2OpenCo;
1785
1786static void coroutine_fn qcow2_open_entry(void *opaque)
1787{
1788 QCow2OpenCo *qoc = opaque;
1789 BDRVQcow2State *s = qoc->bs->opaque;
1790
1791 qemu_co_mutex_lock(&s->lock);
1792 qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp);
1793 qemu_co_mutex_unlock(&s->lock);
1794}
1795
1796static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
1797 Error **errp)
1798{
1799 BDRVQcow2State *s = bs->opaque;
1800 QCow2OpenCo qoc = {
1801 .bs = bs,
1802 .options = options,
1803 .flags = flags,
1804 .errp = errp,
1805 .ret = -EINPROGRESS
1806 };
1807
1808 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
1809 false, errp);
1810 if (!bs->file) {
1811 return -EINVAL;
1812 }
1813
1814
1815 qemu_co_mutex_init(&s->lock);
1816
1817 if (qemu_in_coroutine()) {
1818
1819 qcow2_open_entry(&qoc);
1820 } else {
1821 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
1822 qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
1823 BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
1824 }
1825 return qoc.ret;
1826}
1827
1828static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
1829{
1830 BDRVQcow2State *s = bs->opaque;
1831
1832 if (bs->encrypted) {
1833
1834 bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
1835 }
1836 bs->bl.pwrite_zeroes_alignment = s->cluster_size;
1837 bs->bl.pdiscard_alignment = s->cluster_size;
1838}
1839
1840static int qcow2_reopen_prepare(BDRVReopenState *state,
1841 BlockReopenQueue *queue, Error **errp)
1842{
1843 Qcow2ReopenState *r;
1844 int ret;
1845
1846 r = g_new0(Qcow2ReopenState, 1);
1847 state->opaque = r;
1848
1849 ret = qcow2_update_options_prepare(state->bs, r, state->options,
1850 state->flags, errp);
1851 if (ret < 0) {
1852 goto fail;
1853 }
1854
1855
1856 if ((state->flags & BDRV_O_RDWR) == 0) {
1857 ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
1858 if (ret < 0) {
1859 goto fail;
1860 }
1861
1862 ret = bdrv_flush(state->bs);
1863 if (ret < 0) {
1864 goto fail;
1865 }
1866
1867 ret = qcow2_mark_clean(state->bs);
1868 if (ret < 0) {
1869 goto fail;
1870 }
1871 }
1872
1873 return 0;
1874
1875fail:
1876 qcow2_update_options_abort(state->bs, r);
1877 g_free(r);
1878 return ret;
1879}
1880
1881static void qcow2_reopen_commit(BDRVReopenState *state)
1882{
1883 qcow2_update_options_commit(state->bs, state->opaque);
1884 if (state->flags & BDRV_O_RDWR) {
1885 Error *local_err = NULL;
1886
1887 if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) {
1888
1889
1890
1891
1892
1893 error_reportf_err(local_err,
1894 "%s: Failed to make dirty bitmaps writable: ",
1895 bdrv_get_node_name(state->bs));
1896 }
1897 }
1898 g_free(state->opaque);
1899}
1900
1901static void qcow2_reopen_abort(BDRVReopenState *state)
1902{
1903 qcow2_update_options_abort(state->bs, state->opaque);
1904 g_free(state->opaque);
1905}
1906
1907static void qcow2_join_options(QDict *options, QDict *old_options)
1908{
1909 bool has_new_overlap_template =
1910 qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
1911 qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
1912 bool has_new_total_cache_size =
1913 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
1914 bool has_all_cache_options;
1915
1916
1917 if (has_new_overlap_template) {
1918 qdict_del(old_options, QCOW2_OPT_OVERLAP);
1919 qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
1920 qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
1921 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
1922 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
1923 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
1924 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
1925 qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
1926 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
1927 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
1928 }
1929
1930
1931 if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
1932 qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
1933 qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1934 }
1935
1936 qdict_join(options, old_options, false);
1937
1938
1939
1940
1941
1942
1943 has_all_cache_options =
1944 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
1945 qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
1946 qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1947
1948 if (has_all_cache_options && !has_new_total_cache_size) {
1949 qdict_del(options, QCOW2_OPT_CACHE_SIZE);
1950 }
1951}
1952
1953static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
1954 bool want_zero,
1955 int64_t offset, int64_t count,
1956 int64_t *pnum, int64_t *map,
1957 BlockDriverState **file)
1958{
1959 BDRVQcow2State *s = bs->opaque;
1960 uint64_t cluster_offset;
1961 int index_in_cluster, ret;
1962 unsigned int bytes;
1963 int status = 0;
1964
1965 qemu_co_mutex_lock(&s->lock);
1966
1967 if (!s->metadata_preallocation_checked) {
1968 ret = qcow2_detect_metadata_preallocation(bs);
1969 s->metadata_preallocation = (ret == 1);
1970 s->metadata_preallocation_checked = true;
1971 }
1972
1973 bytes = MIN(INT_MAX, count);
1974 ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
1975 qemu_co_mutex_unlock(&s->lock);
1976 if (ret < 0) {
1977 return ret;
1978 }
1979
1980 *pnum = bytes;
1981
1982 if ((ret == QCOW2_CLUSTER_NORMAL || ret == QCOW2_CLUSTER_ZERO_ALLOC) &&
1983 !s->crypto) {
1984 index_in_cluster = offset & (s->cluster_size - 1);
1985 *map = cluster_offset | index_in_cluster;
1986 *file = s->data_file->bs;
1987 status |= BDRV_BLOCK_OFFSET_VALID;
1988 }
1989 if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
1990 status |= BDRV_BLOCK_ZERO;
1991 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
1992 status |= BDRV_BLOCK_DATA;
1993 }
1994 if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) &&
1995 (status & BDRV_BLOCK_OFFSET_VALID))
1996 {
1997 status |= BDRV_BLOCK_RECURSE;
1998 }
1999 return status;
2000}
2001
2002static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
2003 QCowL2Meta **pl2meta,
2004 bool link_l2)
2005{
2006 int ret = 0;
2007 QCowL2Meta *l2meta = *pl2meta;
2008
2009 while (l2meta != NULL) {
2010 QCowL2Meta *next;
2011
2012 if (link_l2) {
2013 ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
2014 if (ret) {
2015 goto out;
2016 }
2017 } else {
2018 qcow2_alloc_cluster_abort(bs, l2meta);
2019 }
2020
2021
2022 if (l2meta->nb_clusters != 0) {
2023 QLIST_REMOVE(l2meta, next_in_flight);
2024 }
2025
2026 qemu_co_queue_restart_all(&l2meta->dependent_requests);
2027
2028 next = l2meta->next;
2029 g_free(l2meta);
2030 l2meta = next;
2031 }
2032out:
2033 *pl2meta = l2meta;
2034 return ret;
2035}
2036
2037static coroutine_fn int
2038qcow2_co_preadv_encrypted(BlockDriverState *bs,
2039 uint64_t file_cluster_offset,
2040 uint64_t offset,
2041 uint64_t bytes,
2042 QEMUIOVector *qiov,
2043 uint64_t qiov_offset)
2044{
2045 int ret;
2046 BDRVQcow2State *s = bs->opaque;
2047 uint8_t *buf;
2048
2049 assert(bs->encrypted && s->crypto);
2050 assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060 buf = qemu_try_blockalign(s->data_file->bs, bytes);
2061 if (buf == NULL) {
2062 return -ENOMEM;
2063 }
2064
2065 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
2066 ret = bdrv_co_pread(s->data_file,
2067 file_cluster_offset + offset_into_cluster(s, offset),
2068 bytes, buf, 0);
2069 if (ret < 0) {
2070 goto fail;
2071 }
2072
2073 assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
2074 assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
2075 if (qcow2_co_decrypt(bs,
2076 file_cluster_offset + offset_into_cluster(s, offset),
2077 offset, buf, bytes) < 0)
2078 {
2079 ret = -EIO;
2080 goto fail;
2081 }
2082 qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes);
2083
2084fail:
2085 qemu_vfree(buf);
2086
2087 return ret;
2088}
2089
2090typedef struct Qcow2AioTask {
2091 AioTask task;
2092
2093 BlockDriverState *bs;
2094 QCow2ClusterType cluster_type;
2095 uint64_t file_cluster_offset;
2096 uint64_t offset;
2097 uint64_t bytes;
2098 QEMUIOVector *qiov;
2099 uint64_t qiov_offset;
2100 QCowL2Meta *l2meta;
2101} Qcow2AioTask;
2102
2103static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
2104static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
2105 AioTaskPool *pool,
2106 AioTaskFunc func,
2107 QCow2ClusterType cluster_type,
2108 uint64_t file_cluster_offset,
2109 uint64_t offset,
2110 uint64_t bytes,
2111 QEMUIOVector *qiov,
2112 size_t qiov_offset,
2113 QCowL2Meta *l2meta)
2114{
2115 Qcow2AioTask local_task;
2116 Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
2117
2118 *task = (Qcow2AioTask) {
2119 .task.func = func,
2120 .bs = bs,
2121 .cluster_type = cluster_type,
2122 .qiov = qiov,
2123 .file_cluster_offset = file_cluster_offset,
2124 .offset = offset,
2125 .bytes = bytes,
2126 .qiov_offset = qiov_offset,
2127 .l2meta = l2meta,
2128 };
2129
2130 trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
2131 func == qcow2_co_preadv_task_entry ? "read" : "write",
2132 cluster_type, file_cluster_offset, offset, bytes,
2133 qiov, qiov_offset);
2134
2135 if (!pool) {
2136 return func(&task->task);
2137 }
2138
2139 aio_task_pool_start_task(pool, &task->task);
2140
2141 return 0;
2142}
2143
2144static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
2145 QCow2ClusterType cluster_type,
2146 uint64_t file_cluster_offset,
2147 uint64_t offset, uint64_t bytes,
2148 QEMUIOVector *qiov,
2149 size_t qiov_offset)
2150{
2151 BDRVQcow2State *s = bs->opaque;
2152 int offset_in_cluster = offset_into_cluster(s, offset);
2153
2154 switch (cluster_type) {
2155 case QCOW2_CLUSTER_ZERO_PLAIN:
2156 case QCOW2_CLUSTER_ZERO_ALLOC:
2157
2158 g_assert_not_reached();
2159
2160 case QCOW2_CLUSTER_UNALLOCATED:
2161 assert(bs->backing);
2162
2163 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
2164 return bdrv_co_preadv_part(bs->backing, offset, bytes,
2165 qiov, qiov_offset, 0);
2166
2167 case QCOW2_CLUSTER_COMPRESSED:
2168 return qcow2_co_preadv_compressed(bs, file_cluster_offset,
2169 offset, bytes, qiov, qiov_offset);
2170
2171 case QCOW2_CLUSTER_NORMAL:
2172 if ((file_cluster_offset & 511) != 0) {
2173 return -EIO;
2174 }
2175
2176 if (bs->encrypted) {
2177 return qcow2_co_preadv_encrypted(bs, file_cluster_offset,
2178 offset, bytes, qiov, qiov_offset);
2179 }
2180
2181 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
2182 return bdrv_co_preadv_part(s->data_file,
2183 file_cluster_offset + offset_in_cluster,
2184 bytes, qiov, qiov_offset, 0);
2185
2186 default:
2187 g_assert_not_reached();
2188 }
2189
2190 g_assert_not_reached();
2191}
2192
2193static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
2194{
2195 Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
2196
2197 assert(!t->l2meta);
2198
2199 return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset,
2200 t->offset, t->bytes, t->qiov, t->qiov_offset);
2201}
2202
2203static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
2204 uint64_t offset, uint64_t bytes,
2205 QEMUIOVector *qiov,
2206 size_t qiov_offset, int flags)
2207{
2208 BDRVQcow2State *s = bs->opaque;
2209 int ret = 0;
2210 unsigned int cur_bytes;
2211 uint64_t cluster_offset = 0;
2212 AioTaskPool *aio = NULL;
2213
2214 while (bytes != 0 && aio_task_pool_status(aio) == 0) {
2215
2216 cur_bytes = MIN(bytes, INT_MAX);
2217 if (s->crypto) {
2218 cur_bytes = MIN(cur_bytes,
2219 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2220 }
2221
2222 qemu_co_mutex_lock(&s->lock);
2223 ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
2224 qemu_co_mutex_unlock(&s->lock);
2225 if (ret < 0) {
2226 goto out;
2227 }
2228
2229 if (ret == QCOW2_CLUSTER_ZERO_PLAIN ||
2230 ret == QCOW2_CLUSTER_ZERO_ALLOC ||
2231 (ret == QCOW2_CLUSTER_UNALLOCATED && !bs->backing))
2232 {
2233 qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
2234 } else {
2235 if (!aio && cur_bytes != bytes) {
2236 aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
2237 }
2238 ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret,
2239 cluster_offset, offset, cur_bytes,
2240 qiov, qiov_offset, NULL);
2241 if (ret < 0) {
2242 goto out;
2243 }
2244 }
2245
2246 bytes -= cur_bytes;
2247 offset += cur_bytes;
2248 qiov_offset += cur_bytes;
2249 }
2250
2251out:
2252 if (aio) {
2253 aio_task_pool_wait_all(aio);
2254 if (ret == 0) {
2255 ret = aio_task_pool_status(aio);
2256 }
2257 g_free(aio);
2258 }
2259
2260 return ret;
2261}
2262
2263
2264
2265static bool merge_cow(uint64_t offset, unsigned bytes,
2266 QEMUIOVector *qiov, size_t qiov_offset,
2267 QCowL2Meta *l2meta)
2268{
2269 QCowL2Meta *m;
2270
2271 for (m = l2meta; m != NULL; m = m->next) {
2272
2273 if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
2274 continue;
2275 }
2276
2277
2278 if (m->skip_cow) {
2279 continue;
2280 }
2281
2282
2283
2284 if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
2285 continue;
2286 }
2287
2288
2289
2290 if (m->offset + m->cow_end.offset != offset + bytes) {
2291 continue;
2292 }
2293
2294
2295
2296 if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
2297 continue;
2298 }
2299
2300 m->data_qiov = qiov;
2301 m->data_qiov_offset = qiov_offset;
2302 return true;
2303 }
2304
2305 return false;
2306}
2307
2308static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes)
2309{
2310 int64_t nr;
2311 return !bytes ||
2312 (!bdrv_is_allocated_above(bs, NULL, false, offset, bytes, &nr) &&
2313 nr == bytes);
2314}
2315
2316static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
2317{
2318
2319
2320
2321
2322
2323
2324 return is_unallocated(bs, m->offset + m->cow_start.offset,
2325 m->cow_start.nb_bytes) &&
2326 is_unallocated(bs, m->offset + m->cow_end.offset,
2327 m->cow_end.nb_bytes);
2328}
2329
2330static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
2331{
2332 BDRVQcow2State *s = bs->opaque;
2333 QCowL2Meta *m;
2334
2335 if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
2336 return 0;
2337 }
2338
2339 if (bs->encrypted) {
2340 return 0;
2341 }
2342
2343 for (m = l2meta; m != NULL; m = m->next) {
2344 int ret;
2345
2346 if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
2347 continue;
2348 }
2349
2350 if (!is_zero_cow(bs, m)) {
2351 continue;
2352 }
2353
2354
2355
2356
2357
2358
2359 ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset,
2360 m->nb_clusters * s->cluster_size,
2361 true);
2362 if (ret < 0) {
2363 return ret;
2364 }
2365
2366 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
2367 ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset,
2368 m->nb_clusters * s->cluster_size,
2369 BDRV_REQ_NO_FALLBACK);
2370 if (ret < 0) {
2371 if (ret != -ENOTSUP && ret != -EAGAIN) {
2372 return ret;
2373 }
2374 continue;
2375 }
2376
2377 trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
2378 m->skip_cow = true;
2379 }
2380 return 0;
2381}
2382
2383
2384
2385
2386
2387
2388
2389static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
2390 uint64_t file_cluster_offset,
2391 uint64_t offset, uint64_t bytes,
2392 QEMUIOVector *qiov,
2393 uint64_t qiov_offset,
2394 QCowL2Meta *l2meta)
2395{
2396 int ret;
2397 BDRVQcow2State *s = bs->opaque;
2398 void *crypt_buf = NULL;
2399 int offset_in_cluster = offset_into_cluster(s, offset);
2400 QEMUIOVector encrypted_qiov;
2401
2402 if (bs->encrypted) {
2403 assert(s->crypto);
2404 assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2405 crypt_buf = qemu_try_blockalign(bs->file->bs, bytes);
2406 if (crypt_buf == NULL) {
2407 ret = -ENOMEM;
2408 goto out_unlocked;
2409 }
2410 qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes);
2411
2412 if (qcow2_co_encrypt(bs, file_cluster_offset + offset_in_cluster,
2413 offset, crypt_buf, bytes) < 0)
2414 {
2415 ret = -EIO;
2416 goto out_unlocked;
2417 }
2418
2419 qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes);
2420 qiov = &encrypted_qiov;
2421 qiov_offset = 0;
2422 }
2423
2424
2425 ret = handle_alloc_space(bs, l2meta);
2426 if (ret < 0) {
2427 goto out_unlocked;
2428 }
2429
2430
2431
2432
2433
2434
2435
2436 if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
2437 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
2438 trace_qcow2_writev_data(qemu_coroutine_self(),
2439 file_cluster_offset + offset_in_cluster);
2440 ret = bdrv_co_pwritev_part(s->data_file,
2441 file_cluster_offset + offset_in_cluster,
2442 bytes, qiov, qiov_offset, 0);
2443 if (ret < 0) {
2444 goto out_unlocked;
2445 }
2446 }
2447
2448 qemu_co_mutex_lock(&s->lock);
2449
2450 ret = qcow2_handle_l2meta(bs, &l2meta, true);
2451 goto out_locked;
2452
2453out_unlocked:
2454 qemu_co_mutex_lock(&s->lock);
2455
2456out_locked:
2457 qcow2_handle_l2meta(bs, &l2meta, false);
2458 qemu_co_mutex_unlock(&s->lock);
2459
2460 qemu_vfree(crypt_buf);
2461
2462 return ret;
2463}
2464
2465static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
2466{
2467 Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
2468
2469 assert(!t->cluster_type);
2470
2471 return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset,
2472 t->offset, t->bytes, t->qiov, t->qiov_offset,
2473 t->l2meta);
2474}
2475
2476static coroutine_fn int qcow2_co_pwritev_part(
2477 BlockDriverState *bs, uint64_t offset, uint64_t bytes,
2478 QEMUIOVector *qiov, size_t qiov_offset, int flags)
2479{
2480 BDRVQcow2State *s = bs->opaque;
2481 int offset_in_cluster;
2482 int ret;
2483 unsigned int cur_bytes;
2484 uint64_t cluster_offset;
2485 QCowL2Meta *l2meta = NULL;
2486 AioTaskPool *aio = NULL;
2487
2488 trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
2489
2490 while (bytes != 0 && aio_task_pool_status(aio) == 0) {
2491
2492 l2meta = NULL;
2493
2494 trace_qcow2_writev_start_part(qemu_coroutine_self());
2495 offset_in_cluster = offset_into_cluster(s, offset);
2496 cur_bytes = MIN(bytes, INT_MAX);
2497 if (bs->encrypted) {
2498 cur_bytes = MIN(cur_bytes,
2499 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
2500 - offset_in_cluster);
2501 }
2502
2503 qemu_co_mutex_lock(&s->lock);
2504
2505 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
2506 &cluster_offset, &l2meta);
2507 if (ret < 0) {
2508 goto out_locked;
2509 }
2510
2511 assert((cluster_offset & 511) == 0);
2512
2513 ret = qcow2_pre_write_overlap_check(bs, 0,
2514 cluster_offset + offset_in_cluster,
2515 cur_bytes, true);
2516 if (ret < 0) {
2517 goto out_locked;
2518 }
2519
2520 qemu_co_mutex_unlock(&s->lock);
2521
2522 if (!aio && cur_bytes != bytes) {
2523 aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
2524 }
2525 ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
2526 cluster_offset, offset, cur_bytes,
2527 qiov, qiov_offset, l2meta);
2528 l2meta = NULL;
2529 if (ret < 0) {
2530 goto fail_nometa;
2531 }
2532
2533 bytes -= cur_bytes;
2534 offset += cur_bytes;
2535 qiov_offset += cur_bytes;
2536 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
2537 }
2538 ret = 0;
2539
2540 qemu_co_mutex_lock(&s->lock);
2541
2542out_locked:
2543 qcow2_handle_l2meta(bs, &l2meta, false);
2544
2545 qemu_co_mutex_unlock(&s->lock);
2546
2547fail_nometa:
2548 if (aio) {
2549 aio_task_pool_wait_all(aio);
2550 if (ret == 0) {
2551 ret = aio_task_pool_status(aio);
2552 }
2553 g_free(aio);
2554 }
2555
2556 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
2557
2558 return ret;
2559}
2560
2561static int qcow2_inactivate(BlockDriverState *bs)
2562{
2563 BDRVQcow2State *s = bs->opaque;
2564 int ret, result = 0;
2565 Error *local_err = NULL;
2566
2567 qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err);
2568 if (local_err != NULL) {
2569 result = -EINVAL;
2570 error_reportf_err(local_err, "Lost persistent bitmaps during "
2571 "inactivation of node '%s': ",
2572 bdrv_get_device_or_node_name(bs));
2573 }
2574
2575 ret = qcow2_cache_flush(bs, s->l2_table_cache);
2576 if (ret) {
2577 result = ret;
2578 error_report("Failed to flush the L2 table cache: %s",
2579 strerror(-ret));
2580 }
2581
2582 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
2583 if (ret) {
2584 result = ret;
2585 error_report("Failed to flush the refcount block cache: %s",
2586 strerror(-ret));
2587 }
2588
2589 if (result == 0) {
2590 qcow2_mark_clean(bs);
2591 }
2592
2593 return result;
2594}
2595
2596static void qcow2_close(BlockDriverState *bs)
2597{
2598 BDRVQcow2State *s = bs->opaque;
2599 qemu_vfree(s->l1_table);
2600
2601 s->l1_table = NULL;
2602
2603 if (!(s->flags & BDRV_O_INACTIVE)) {
2604 qcow2_inactivate(bs);
2605 }
2606
2607 cache_clean_timer_del(bs);
2608 qcow2_cache_destroy(s->l2_table_cache);
2609 qcow2_cache_destroy(s->refcount_block_cache);
2610
2611 qcrypto_block_free(s->crypto);
2612 s->crypto = NULL;
2613
2614 g_free(s->unknown_header_fields);
2615 cleanup_unknown_header_ext(bs);
2616
2617 g_free(s->image_data_file);
2618 g_free(s->image_backing_file);
2619 g_free(s->image_backing_format);
2620
2621 if (has_data_file(bs)) {
2622 bdrv_unref_child(bs, s->data_file);
2623 }
2624
2625 qcow2_refcount_close(bs);
2626 qcow2_free_snapshots(bs);
2627}
2628
2629static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
2630 Error **errp)
2631{
2632 BDRVQcow2State *s = bs->opaque;
2633 int flags = s->flags;
2634 QCryptoBlock *crypto = NULL;
2635 QDict *options;
2636 Error *local_err = NULL;
2637 int ret;
2638
2639
2640
2641
2642
2643
2644 crypto = s->crypto;
2645 s->crypto = NULL;
2646
2647 qcow2_close(bs);
2648
2649 memset(s, 0, sizeof(BDRVQcow2State));
2650 options = qdict_clone_shallow(bs->options);
2651
2652 flags &= ~BDRV_O_INACTIVE;
2653 qemu_co_mutex_lock(&s->lock);
2654 ret = qcow2_do_open(bs, options, flags, &local_err);
2655 qemu_co_mutex_unlock(&s->lock);
2656 qobject_unref(options);
2657 if (local_err) {
2658 error_propagate_prepend(errp, local_err,
2659 "Could not reopen qcow2 layer: ");
2660 bs->drv = NULL;
2661 return;
2662 } else if (ret < 0) {
2663 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
2664 bs->drv = NULL;
2665 return;
2666 }
2667
2668 s->crypto = crypto;
2669}
2670
2671static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
2672 size_t len, size_t buflen)
2673{
2674 QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
2675 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
2676
2677 if (buflen < ext_len) {
2678 return -ENOSPC;
2679 }
2680
2681 *ext_backing_fmt = (QCowExtension) {
2682 .magic = cpu_to_be32(magic),
2683 .len = cpu_to_be32(len),
2684 };
2685
2686 if (len) {
2687 memcpy(buf + sizeof(QCowExtension), s, len);
2688 }
2689
2690 return ext_len;
2691}
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701int qcow2_update_header(BlockDriverState *bs)
2702{
2703 BDRVQcow2State *s = bs->opaque;
2704 QCowHeader *header;
2705 char *buf;
2706 size_t buflen = s->cluster_size;
2707 int ret;
2708 uint64_t total_size;
2709 uint32_t refcount_table_clusters;
2710 size_t header_length;
2711 Qcow2UnknownHeaderExtension *uext;
2712
2713 buf = qemu_blockalign(bs, buflen);
2714
2715
2716 header = (QCowHeader*) buf;
2717
2718 if (buflen < sizeof(*header)) {
2719 ret = -ENOSPC;
2720 goto fail;
2721 }
2722
2723 header_length = sizeof(*header) + s->unknown_header_fields_size;
2724 total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
2725 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
2726
2727 *header = (QCowHeader) {
2728
2729 .magic = cpu_to_be32(QCOW_MAGIC),
2730 .version = cpu_to_be32(s->qcow_version),
2731 .backing_file_offset = 0,
2732 .backing_file_size = 0,
2733 .cluster_bits = cpu_to_be32(s->cluster_bits),
2734 .size = cpu_to_be64(total_size),
2735 .crypt_method = cpu_to_be32(s->crypt_method_header),
2736 .l1_size = cpu_to_be32(s->l1_size),
2737 .l1_table_offset = cpu_to_be64(s->l1_table_offset),
2738 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
2739 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
2740 .nb_snapshots = cpu_to_be32(s->nb_snapshots),
2741 .snapshots_offset = cpu_to_be64(s->snapshots_offset),
2742
2743
2744 .incompatible_features = cpu_to_be64(s->incompatible_features),
2745 .compatible_features = cpu_to_be64(s->compatible_features),
2746 .autoclear_features = cpu_to_be64(s->autoclear_features),
2747 .refcount_order = cpu_to_be32(s->refcount_order),
2748 .header_length = cpu_to_be32(header_length),
2749 };
2750
2751
2752 switch (s->qcow_version) {
2753 case 2:
2754 ret = offsetof(QCowHeader, incompatible_features);
2755 break;
2756 case 3:
2757 ret = sizeof(*header);
2758 break;
2759 default:
2760 ret = -EINVAL;
2761 goto fail;
2762 }
2763
2764 buf += ret;
2765 buflen -= ret;
2766 memset(buf, 0, buflen);
2767
2768
2769 if (s->unknown_header_fields_size) {
2770 if (buflen < s->unknown_header_fields_size) {
2771 ret = -ENOSPC;
2772 goto fail;
2773 }
2774
2775 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
2776 buf += s->unknown_header_fields_size;
2777 buflen -= s->unknown_header_fields_size;
2778 }
2779
2780
2781 if (s->image_backing_format) {
2782 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
2783 s->image_backing_format,
2784 strlen(s->image_backing_format),
2785 buflen);
2786 if (ret < 0) {
2787 goto fail;
2788 }
2789
2790 buf += ret;
2791 buflen -= ret;
2792 }
2793
2794
2795 if (has_data_file(bs) && s->image_data_file) {
2796 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE,
2797 s->image_data_file, strlen(s->image_data_file),
2798 buflen);
2799 if (ret < 0) {
2800 goto fail;
2801 }
2802
2803 buf += ret;
2804 buflen -= ret;
2805 }
2806
2807
2808 if (s->crypto_header.offset != 0) {
2809 s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset);
2810 s->crypto_header.length = cpu_to_be64(s->crypto_header.length);
2811 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
2812 &s->crypto_header, sizeof(s->crypto_header),
2813 buflen);
2814 s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
2815 s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
2816 if (ret < 0) {
2817 goto fail;
2818 }
2819 buf += ret;
2820 buflen -= ret;
2821 }
2822
2823
2824 if (s->qcow_version >= 3) {
2825 Qcow2Feature features[] = {
2826 {
2827 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2828 .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
2829 .name = "dirty bit",
2830 },
2831 {
2832 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2833 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
2834 .name = "corrupt bit",
2835 },
2836 {
2837 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2838 .bit = QCOW2_INCOMPAT_DATA_FILE_BITNR,
2839 .name = "external data file",
2840 },
2841 {
2842 .type = QCOW2_FEAT_TYPE_COMPATIBLE,
2843 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
2844 .name = "lazy refcounts",
2845 },
2846 };
2847
2848 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
2849 features, sizeof(features), buflen);
2850 if (ret < 0) {
2851 goto fail;
2852 }
2853 buf += ret;
2854 buflen -= ret;
2855 }
2856
2857
2858 if (s->nb_bitmaps > 0) {
2859 Qcow2BitmapHeaderExt bitmaps_header = {
2860 .nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
2861 .bitmap_directory_size =
2862 cpu_to_be64(s->bitmap_directory_size),
2863 .bitmap_directory_offset =
2864 cpu_to_be64(s->bitmap_directory_offset)
2865 };
2866 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
2867 &bitmaps_header, sizeof(bitmaps_header),
2868 buflen);
2869 if (ret < 0) {
2870 goto fail;
2871 }
2872 buf += ret;
2873 buflen -= ret;
2874 }
2875
2876
2877 QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
2878 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
2879 if (ret < 0) {
2880 goto fail;
2881 }
2882
2883 buf += ret;
2884 buflen -= ret;
2885 }
2886
2887
2888 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
2889 if (ret < 0) {
2890 goto fail;
2891 }
2892
2893 buf += ret;
2894 buflen -= ret;
2895
2896
2897 if (s->image_backing_file) {
2898 size_t backing_file_len = strlen(s->image_backing_file);
2899
2900 if (buflen < backing_file_len) {
2901 ret = -ENOSPC;
2902 goto fail;
2903 }
2904
2905
2906 strncpy(buf, s->image_backing_file, buflen);
2907
2908 header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
2909 header->backing_file_size = cpu_to_be32(backing_file_len);
2910 }
2911
2912
2913 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
2914 if (ret < 0) {
2915 goto fail;
2916 }
2917
2918 ret = 0;
2919fail:
2920 qemu_vfree(header);
2921 return ret;
2922}
2923
2924static int qcow2_change_backing_file(BlockDriverState *bs,
2925 const char *backing_file, const char *backing_fmt)
2926{
2927 BDRVQcow2State *s = bs->opaque;
2928
2929
2930
2931 if (backing_file && data_file_is_raw(bs)) {
2932 return -EINVAL;
2933 }
2934
2935 if (backing_file && strlen(backing_file) > 1023) {
2936 return -EINVAL;
2937 }
2938
2939 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
2940 backing_file ?: "");
2941 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2942 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2943
2944 g_free(s->image_backing_file);
2945 g_free(s->image_backing_format);
2946
2947 s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
2948 s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
2949
2950 return qcow2_update_header(bs);
2951}
2952
2953static int qcow2_crypt_method_from_format(const char *encryptfmt)
2954{
2955 if (g_str_equal(encryptfmt, "luks")) {
2956 return QCOW_CRYPT_LUKS;
2957 } else if (g_str_equal(encryptfmt, "aes")) {
2958 return QCOW_CRYPT_AES;
2959 } else {
2960 return -EINVAL;
2961 }
2962}
2963
2964static int qcow2_set_up_encryption(BlockDriverState *bs,
2965 QCryptoBlockCreateOptions *cryptoopts,
2966 Error **errp)
2967{
2968 BDRVQcow2State *s = bs->opaque;
2969 QCryptoBlock *crypto = NULL;
2970 int fmt, ret;
2971
2972 switch (cryptoopts->format) {
2973 case Q_CRYPTO_BLOCK_FORMAT_LUKS:
2974 fmt = QCOW_CRYPT_LUKS;
2975 break;
2976 case Q_CRYPTO_BLOCK_FORMAT_QCOW:
2977 fmt = QCOW_CRYPT_AES;
2978 break;
2979 default:
2980 error_setg(errp, "Crypto format not supported in qcow2");
2981 return -EINVAL;
2982 }
2983
2984 s->crypt_method_header = fmt;
2985
2986 crypto = qcrypto_block_create(cryptoopts, "encrypt.",
2987 qcow2_crypto_hdr_init_func,
2988 qcow2_crypto_hdr_write_func,
2989 bs, errp);
2990 if (!crypto) {
2991 return -EINVAL;
2992 }
2993
2994 ret = qcow2_update_header(bs);
2995 if (ret < 0) {
2996 error_setg_errno(errp, -ret, "Could not write encryption header");
2997 goto out;
2998 }
2999
3000 ret = 0;
3001 out:
3002 qcrypto_block_free(crypto);
3003 return ret;
3004}
3005
3006
3007
3008
3009
3010
3011
3012
3013static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
3014 uint64_t new_length, PreallocMode mode,
3015 Error **errp)
3016{
3017 BDRVQcow2State *s = bs->opaque;
3018 uint64_t bytes;
3019 uint64_t host_offset = 0;
3020 int64_t file_length;
3021 unsigned int cur_bytes;
3022 int ret;
3023 QCowL2Meta *meta;
3024
3025 assert(offset <= new_length);
3026 bytes = new_length - offset;
3027
3028 while (bytes) {
3029 cur_bytes = MIN(bytes, QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size));
3030 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
3031 &host_offset, &meta);
3032 if (ret < 0) {
3033 error_setg_errno(errp, -ret, "Allocating clusters failed");
3034 return ret;
3035 }
3036
3037 while (meta) {
3038 QCowL2Meta *next = meta->next;
3039
3040 ret = qcow2_alloc_cluster_link_l2(bs, meta);
3041 if (ret < 0) {
3042 error_setg_errno(errp, -ret, "Mapping clusters failed");
3043 qcow2_free_any_clusters(bs, meta->alloc_offset,
3044 meta->nb_clusters, QCOW2_DISCARD_NEVER);
3045 return ret;
3046 }
3047
3048
3049
3050 QLIST_REMOVE(meta, next_in_flight);
3051
3052 g_free(meta);
3053 meta = next;
3054 }
3055
3056
3057
3058 bytes -= cur_bytes;
3059 offset += cur_bytes;
3060 }
3061
3062
3063
3064
3065
3066
3067 file_length = bdrv_getlength(s->data_file->bs);
3068 if (file_length < 0) {
3069 error_setg_errno(errp, -file_length, "Could not get file size");
3070 return file_length;
3071 }
3072
3073 if (host_offset + cur_bytes > file_length) {
3074 if (mode == PREALLOC_MODE_METADATA) {
3075 mode = PREALLOC_MODE_OFF;
3076 }
3077 ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
3078 mode, errp);
3079 if (ret < 0) {
3080 return ret;
3081 }
3082 }
3083
3084 return 0;
3085}
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
3097 int refcount_order, bool generous_increase,
3098 uint64_t *refblock_count)
3099{
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109 int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t);
3110 int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
3111 int64_t table = 0;
3112 int64_t blocks = 0;
3113 int64_t last;
3114 int64_t n = 0;
3115
3116 do {
3117 last = n;
3118 blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
3119 table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
3120 n = clusters + blocks + table;
3121
3122 if (n == last && generous_increase) {
3123 clusters += DIV_ROUND_UP(table, 2);
3124 n = 0;
3125 generous_increase = false;
3126 }
3127 } while (n != last);
3128
3129 if (refblock_count) {
3130 *refblock_count = blocks;
3131 }
3132
3133 return (blocks + table) * cluster_size;
3134}
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145static int64_t qcow2_calc_prealloc_size(int64_t total_size,
3146 size_t cluster_size,
3147 int refcount_order)
3148{
3149 int64_t meta_size = 0;
3150 uint64_t nl1e, nl2e;
3151 int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
3152
3153
3154 meta_size += cluster_size;
3155
3156
3157 nl2e = aligned_total_size / cluster_size;
3158 nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t));
3159 meta_size += nl2e * sizeof(uint64_t);
3160
3161
3162 nl1e = nl2e * sizeof(uint64_t) / cluster_size;
3163 nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t));
3164 meta_size += nl1e * sizeof(uint64_t);
3165
3166
3167 meta_size += qcow2_refcount_metadata_size(
3168 (meta_size + aligned_total_size) / cluster_size,
3169 cluster_size, refcount_order, false, NULL);
3170
3171 return meta_size + aligned_total_size;
3172}
3173
3174static bool validate_cluster_size(size_t cluster_size, Error **errp)
3175{
3176 int cluster_bits = ctz32(cluster_size);
3177 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
3178 (1 << cluster_bits) != cluster_size)
3179 {
3180 error_setg(errp, "Cluster size must be a power of two between %d and "
3181 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
3182 return false;
3183 }
3184 return true;
3185}
3186
3187static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp)
3188{
3189 size_t cluster_size;
3190
3191 cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
3192 DEFAULT_CLUSTER_SIZE);
3193 if (!validate_cluster_size(cluster_size, errp)) {
3194 return 0;
3195 }
3196 return cluster_size;
3197}
3198
3199static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
3200{
3201 char *buf;
3202 int ret;
3203
3204 buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
3205 if (!buf) {
3206 ret = 3;
3207 } else if (!strcmp(buf, "0.10")) {
3208 ret = 2;
3209 } else if (!strcmp(buf, "1.1")) {
3210 ret = 3;
3211 } else {
3212 error_setg(errp, "Invalid compatibility level: '%s'", buf);
3213 ret = -EINVAL;
3214 }
3215 g_free(buf);
3216 return ret;
3217}
3218
3219static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
3220 Error **errp)
3221{
3222 uint64_t refcount_bits;
3223
3224 refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
3225 if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
3226 error_setg(errp, "Refcount width must be a power of two and may not "
3227 "exceed 64 bits");
3228 return 0;
3229 }
3230
3231 if (version < 3 && refcount_bits != 16) {
3232 error_setg(errp, "Different refcount widths than 16 bits require "
3233 "compatibility level 1.1 or above (use compat=1.1 or "
3234 "greater)");
3235 return 0;
3236 }
3237
3238 return refcount_bits;
3239}
3240
3241static int coroutine_fn
3242qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
3243{
3244 BlockdevCreateOptionsQcow2 *qcow2_opts;
3245 QDict *options;
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259 BlockBackend *blk = NULL;
3260 BlockDriverState *bs = NULL;
3261 BlockDriverState *data_bs = NULL;
3262 QCowHeader *header;
3263 size_t cluster_size;
3264 int version;
3265 int refcount_order;
3266 uint64_t* refcount_table;
3267 Error *local_err = NULL;
3268 int ret;
3269
3270 assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
3271 qcow2_opts = &create_options->u.qcow2;
3272
3273 bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp);
3274 if (bs == NULL) {
3275 return -EIO;
3276 }
3277
3278
3279 if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) {
3280 error_setg(errp, "Image size must be a multiple of 512 bytes");
3281 ret = -EINVAL;
3282 goto out;
3283 }
3284
3285 if (qcow2_opts->has_version) {
3286 switch (qcow2_opts->version) {
3287 case BLOCKDEV_QCOW2_VERSION_V2:
3288 version = 2;
3289 break;
3290 case BLOCKDEV_QCOW2_VERSION_V3:
3291 version = 3;
3292 break;
3293 default:
3294 g_assert_not_reached();
3295 }
3296 } else {
3297 version = 3;
3298 }
3299
3300 if (qcow2_opts->has_cluster_size) {
3301 cluster_size = qcow2_opts->cluster_size;
3302 } else {
3303 cluster_size = DEFAULT_CLUSTER_SIZE;
3304 }
3305
3306 if (!validate_cluster_size(cluster_size, errp)) {
3307 ret = -EINVAL;
3308 goto out;
3309 }
3310
3311 if (!qcow2_opts->has_preallocation) {
3312 qcow2_opts->preallocation = PREALLOC_MODE_OFF;
3313 }
3314 if (qcow2_opts->has_backing_file &&
3315 qcow2_opts->preallocation != PREALLOC_MODE_OFF)
3316 {
3317 error_setg(errp, "Backing file and preallocation cannot be used at "
3318 "the same time");
3319 ret = -EINVAL;
3320 goto out;
3321 }
3322 if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) {
3323 error_setg(errp, "Backing format cannot be used without backing file");
3324 ret = -EINVAL;
3325 goto out;
3326 }
3327
3328 if (!qcow2_opts->has_lazy_refcounts) {
3329 qcow2_opts->lazy_refcounts = false;
3330 }
3331 if (version < 3 && qcow2_opts->lazy_refcounts) {
3332 error_setg(errp, "Lazy refcounts only supported with compatibility "
3333 "level 1.1 and above (use version=v3 or greater)");
3334 ret = -EINVAL;
3335 goto out;
3336 }
3337
3338 if (!qcow2_opts->has_refcount_bits) {
3339 qcow2_opts->refcount_bits = 16;
3340 }
3341 if (qcow2_opts->refcount_bits > 64 ||
3342 !is_power_of_2(qcow2_opts->refcount_bits))
3343 {
3344 error_setg(errp, "Refcount width must be a power of two and may not "
3345 "exceed 64 bits");
3346 ret = -EINVAL;
3347 goto out;
3348 }
3349 if (version < 3 && qcow2_opts->refcount_bits != 16) {
3350 error_setg(errp, "Different refcount widths than 16 bits require "
3351 "compatibility level 1.1 or above (use version=v3 or "
3352 "greater)");
3353 ret = -EINVAL;
3354 goto out;
3355 }
3356 refcount_order = ctz32(qcow2_opts->refcount_bits);
3357
3358 if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) {
3359 error_setg(errp, "data-file-raw requires data-file");
3360 ret = -EINVAL;
3361 goto out;
3362 }
3363 if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) {
3364 error_setg(errp, "Backing file and data-file-raw cannot be used at "
3365 "the same time");
3366 ret = -EINVAL;
3367 goto out;
3368 }
3369
3370 if (qcow2_opts->data_file) {
3371 if (version < 3) {
3372 error_setg(errp, "External data files are only supported with "
3373 "compatibility level 1.1 and above (use version=v3 or "
3374 "greater)");
3375 ret = -EINVAL;
3376 goto out;
3377 }
3378 data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp);
3379 if (data_bs == NULL) {
3380 ret = -EIO;
3381 goto out;
3382 }
3383 }
3384
3385
3386 blk = blk_new(bdrv_get_aio_context(bs),
3387 BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
3388 ret = blk_insert_bs(blk, bs, errp);
3389 if (ret < 0) {
3390 goto out;
3391 }
3392 blk_set_allow_write_beyond_eof(blk, true);
3393
3394
3395 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
3396 header = g_malloc0(cluster_size);
3397 *header = (QCowHeader) {
3398 .magic = cpu_to_be32(QCOW_MAGIC),
3399 .version = cpu_to_be32(version),
3400 .cluster_bits = cpu_to_be32(ctz32(cluster_size)),
3401 .size = cpu_to_be64(0),
3402 .l1_table_offset = cpu_to_be64(0),
3403 .l1_size = cpu_to_be32(0),
3404 .refcount_table_offset = cpu_to_be64(cluster_size),
3405 .refcount_table_clusters = cpu_to_be32(1),
3406 .refcount_order = cpu_to_be32(refcount_order),
3407 .header_length = cpu_to_be32(sizeof(*header)),
3408 };
3409
3410
3411 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
3412
3413 if (qcow2_opts->lazy_refcounts) {
3414 header->compatible_features |=
3415 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
3416 }
3417 if (data_bs) {
3418 header->incompatible_features |=
3419 cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE);
3420 }
3421 if (qcow2_opts->data_file_raw) {
3422 header->autoclear_features |=
3423 cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW);
3424 }
3425
3426 ret = blk_pwrite(blk, 0, header, cluster_size, 0);
3427 g_free(header);
3428 if (ret < 0) {
3429 error_setg_errno(errp, -ret, "Could not write qcow2 header");
3430 goto out;
3431 }
3432
3433
3434 refcount_table = g_malloc0(2 * cluster_size);
3435 refcount_table[0] = cpu_to_be64(2 * cluster_size);
3436 ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
3437 g_free(refcount_table);
3438
3439 if (ret < 0) {
3440 error_setg_errno(errp, -ret, "Could not write refcount table");
3441 goto out;
3442 }
3443
3444 blk_unref(blk);
3445 blk = NULL;
3446
3447
3448
3449
3450
3451
3452 options = qdict_new();
3453 qdict_put_str(options, "driver", "qcow2");
3454 qdict_put_str(options, "file", bs->node_name);
3455 if (data_bs) {
3456 qdict_put_str(options, "data-file", data_bs->node_name);
3457 }
3458 blk = blk_new_open(NULL, NULL, options,
3459 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
3460 &local_err);
3461 if (blk == NULL) {
3462 error_propagate(errp, local_err);
3463 ret = -EIO;
3464 goto out;
3465 }
3466
3467 ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
3468 if (ret < 0) {
3469 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
3470 "header and refcount table");
3471 goto out;
3472
3473 } else if (ret != 0) {
3474 error_report("Huh, first cluster in empty image is already in use?");
3475 abort();
3476 }
3477
3478
3479 if (data_bs) {
3480 BDRVQcow2State *s = blk_bs(blk)->opaque;
3481 s->image_data_file = g_strdup(data_bs->filename);
3482 }
3483
3484
3485 ret = qcow2_update_header(blk_bs(blk));
3486 if (ret < 0) {
3487 error_setg_errno(errp, -ret, "Could not update qcow2 header");
3488 goto out;
3489 }
3490
3491
3492 ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
3493 errp);
3494 if (ret < 0) {
3495 error_prepend(errp, "Could not resize image: ");
3496 goto out;
3497 }
3498
3499
3500 if (qcow2_opts->has_backing_file) {
3501 const char *backing_format = NULL;
3502
3503 if (qcow2_opts->has_backing_fmt) {
3504 backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt);
3505 }
3506
3507 ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
3508 backing_format);
3509 if (ret < 0) {
3510 error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
3511 "with format '%s'", qcow2_opts->backing_file,
3512 backing_format);
3513 goto out;
3514 }
3515 }
3516
3517
3518 if (qcow2_opts->has_encrypt) {
3519 ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
3520 if (ret < 0) {
3521 goto out;
3522 }
3523 }
3524
3525 blk_unref(blk);
3526 blk = NULL;
3527
3528
3529
3530
3531
3532
3533
3534 options = qdict_new();
3535 qdict_put_str(options, "driver", "qcow2");
3536 qdict_put_str(options, "file", bs->node_name);
3537 if (data_bs) {
3538 qdict_put_str(options, "data-file", data_bs->node_name);
3539 }
3540 blk = blk_new_open(NULL, NULL, options,
3541 BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
3542 &local_err);
3543 if (blk == NULL) {
3544 error_propagate(errp, local_err);
3545 ret = -EIO;
3546 goto out;
3547 }
3548
3549 ret = 0;
3550out:
3551 blk_unref(blk);
3552 bdrv_unref(bs);
3553 bdrv_unref(data_bs);
3554 return ret;
3555}
3556
3557static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts,
3558 Error **errp)
3559{
3560 BlockdevCreateOptions *create_options = NULL;
3561 QDict *qdict;
3562 Visitor *v;
3563 BlockDriverState *bs = NULL;
3564 BlockDriverState *data_bs = NULL;
3565 Error *local_err = NULL;
3566 const char *val;
3567 int ret;
3568
3569
3570
3571
3572
3573 qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts,
3574 true);
3575
3576
3577 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT);
3578 if (val && !strcmp(val, "on")) {
3579 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow");
3580 } else if (val && !strcmp(val, "off")) {
3581 qdict_del(qdict, BLOCK_OPT_ENCRYPT);
3582 }
3583
3584 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT);
3585 if (val && !strcmp(val, "aes")) {
3586 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow");
3587 }
3588
3589
3590
3591 val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL);
3592 if (val && !strcmp(val, "0.10")) {
3593 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2");
3594 } else if (val && !strcmp(val, "1.1")) {
3595 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
3596 }
3597
3598
3599 static const QDictRenames opt_renames[] = {
3600 { BLOCK_OPT_BACKING_FILE, "backing-file" },
3601 { BLOCK_OPT_BACKING_FMT, "backing-fmt" },
3602 { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" },
3603 { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" },
3604 { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" },
3605 { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT },
3606 { BLOCK_OPT_COMPAT_LEVEL, "version" },
3607 { BLOCK_OPT_DATA_FILE_RAW, "data-file-raw" },
3608 { NULL, NULL },
3609 };
3610
3611 if (!qdict_rename_keys(qdict, opt_renames, errp)) {
3612 ret = -EINVAL;
3613 goto finish;
3614 }
3615
3616
3617 ret = bdrv_create_file(filename, opts, errp);
3618 if (ret < 0) {
3619 goto finish;
3620 }
3621
3622 bs = bdrv_open(filename, NULL, NULL,
3623 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
3624 if (bs == NULL) {
3625 ret = -EIO;
3626 goto finish;
3627 }
3628
3629
3630 val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
3631 if (val) {
3632 ret = bdrv_create_file(val, opts, errp);
3633 if (ret < 0) {
3634 goto finish;
3635 }
3636
3637 data_bs = bdrv_open(val, NULL, NULL,
3638 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
3639 errp);
3640 if (data_bs == NULL) {
3641 ret = -EIO;
3642 goto finish;
3643 }
3644
3645 qdict_del(qdict, BLOCK_OPT_DATA_FILE);
3646 qdict_put_str(qdict, "data-file", data_bs->node_name);
3647 }
3648
3649
3650 qdict_put_str(qdict, "driver", "qcow2");
3651 qdict_put_str(qdict, "file", bs->node_name);
3652
3653
3654 v = qobject_input_visitor_new_flat_confused(qdict, errp);
3655 if (!v) {
3656 ret = -EINVAL;
3657 goto finish;
3658 }
3659
3660 visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
3661 visit_free(v);
3662
3663 if (local_err) {
3664 error_propagate(errp, local_err);
3665 ret = -EINVAL;
3666 goto finish;
3667 }
3668
3669
3670 create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size,
3671 BDRV_SECTOR_SIZE);
3672
3673
3674 ret = qcow2_co_create(create_options, errp);
3675 if (ret < 0) {
3676 goto finish;
3677 }
3678
3679 ret = 0;
3680finish:
3681 qobject_unref(qdict);
3682 bdrv_unref(bs);
3683 bdrv_unref(data_bs);
3684 qapi_free_BlockdevCreateOptions(create_options);
3685 return ret;
3686}
3687
3688
3689static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
3690{
3691 int64_t nr;
3692 int res;
3693
3694
3695 if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
3696 bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
3697 }
3698
3699 if (!bytes) {
3700 return true;
3701 }
3702 res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
3703 return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
3704}
3705
3706static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
3707 int64_t offset, int bytes, BdrvRequestFlags flags)
3708{
3709 int ret;
3710 BDRVQcow2State *s = bs->opaque;
3711
3712 uint32_t head = offset % s->cluster_size;
3713 uint32_t tail = (offset + bytes) % s->cluster_size;
3714
3715 trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
3716 if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
3717 tail = 0;
3718 }
3719
3720 if (head || tail) {
3721 uint64_t off;
3722 unsigned int nr;
3723
3724 assert(head + bytes <= s->cluster_size);
3725
3726
3727 if (!(is_zero(bs, offset - head, head) &&
3728 is_zero(bs, offset + bytes,
3729 tail ? s->cluster_size - tail : 0))) {
3730 return -ENOTSUP;
3731 }
3732
3733 qemu_co_mutex_lock(&s->lock);
3734
3735 offset = QEMU_ALIGN_DOWN(offset, s->cluster_size);
3736 bytes = s->cluster_size;
3737 nr = s->cluster_size;
3738 ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
3739 if (ret != QCOW2_CLUSTER_UNALLOCATED &&
3740 ret != QCOW2_CLUSTER_ZERO_PLAIN &&
3741 ret != QCOW2_CLUSTER_ZERO_ALLOC) {
3742 qemu_co_mutex_unlock(&s->lock);
3743 return -ENOTSUP;
3744 }
3745 } else {
3746 qemu_co_mutex_lock(&s->lock);
3747 }
3748
3749 trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
3750
3751
3752 ret = qcow2_cluster_zeroize(bs, offset, bytes, flags);
3753 qemu_co_mutex_unlock(&s->lock);
3754
3755 return ret;
3756}
3757
3758static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
3759 int64_t offset, int bytes)
3760{
3761 int ret;
3762 BDRVQcow2State *s = bs->opaque;
3763
3764 if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
3765 assert(bytes < s->cluster_size);
3766
3767
3768 if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
3769 offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
3770 return -ENOTSUP;
3771 }
3772 }
3773
3774 qemu_co_mutex_lock(&s->lock);
3775 ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
3776 false);
3777 qemu_co_mutex_unlock(&s->lock);
3778 return ret;
3779}
3780
3781static int coroutine_fn
3782qcow2_co_copy_range_from(BlockDriverState *bs,
3783 BdrvChild *src, uint64_t src_offset,
3784 BdrvChild *dst, uint64_t dst_offset,
3785 uint64_t bytes, BdrvRequestFlags read_flags,
3786 BdrvRequestFlags write_flags)
3787{
3788 BDRVQcow2State *s = bs->opaque;
3789 int ret;
3790 unsigned int cur_bytes;
3791 BdrvChild *child = NULL;
3792 BdrvRequestFlags cur_write_flags;
3793
3794 assert(!bs->encrypted);
3795 qemu_co_mutex_lock(&s->lock);
3796
3797 while (bytes != 0) {
3798 uint64_t copy_offset = 0;
3799
3800 cur_bytes = MIN(bytes, INT_MAX);
3801 cur_write_flags = write_flags;
3802
3803 ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset);
3804 if (ret < 0) {
3805 goto out;
3806 }
3807
3808 switch (ret) {
3809 case QCOW2_CLUSTER_UNALLOCATED:
3810 if (bs->backing && bs->backing->bs) {
3811 int64_t backing_length = bdrv_getlength(bs->backing->bs);
3812 if (src_offset >= backing_length) {
3813 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3814 } else {
3815 child = bs->backing;
3816 cur_bytes = MIN(cur_bytes, backing_length - src_offset);
3817 copy_offset = src_offset;
3818 }
3819 } else {
3820 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3821 }
3822 break;
3823
3824 case QCOW2_CLUSTER_ZERO_PLAIN:
3825 case QCOW2_CLUSTER_ZERO_ALLOC:
3826 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3827 break;
3828
3829 case QCOW2_CLUSTER_COMPRESSED:
3830 ret = -ENOTSUP;
3831 goto out;
3832
3833 case QCOW2_CLUSTER_NORMAL:
3834 child = s->data_file;
3835 copy_offset += offset_into_cluster(s, src_offset);
3836 if ((copy_offset & 511) != 0) {
3837 ret = -EIO;
3838 goto out;
3839 }
3840 break;
3841
3842 default:
3843 abort();
3844 }
3845 qemu_co_mutex_unlock(&s->lock);
3846 ret = bdrv_co_copy_range_from(child,
3847 copy_offset,
3848 dst, dst_offset,
3849 cur_bytes, read_flags, cur_write_flags);
3850 qemu_co_mutex_lock(&s->lock);
3851 if (ret < 0) {
3852 goto out;
3853 }
3854
3855 bytes -= cur_bytes;
3856 src_offset += cur_bytes;
3857 dst_offset += cur_bytes;
3858 }
3859 ret = 0;
3860
3861out:
3862 qemu_co_mutex_unlock(&s->lock);
3863 return ret;
3864}
3865
3866static int coroutine_fn
3867qcow2_co_copy_range_to(BlockDriverState *bs,
3868 BdrvChild *src, uint64_t src_offset,
3869 BdrvChild *dst, uint64_t dst_offset,
3870 uint64_t bytes, BdrvRequestFlags read_flags,
3871 BdrvRequestFlags write_flags)
3872{
3873 BDRVQcow2State *s = bs->opaque;
3874 int offset_in_cluster;
3875 int ret;
3876 unsigned int cur_bytes;
3877 uint64_t cluster_offset;
3878 QCowL2Meta *l2meta = NULL;
3879
3880 assert(!bs->encrypted);
3881
3882 qemu_co_mutex_lock(&s->lock);
3883
3884 while (bytes != 0) {
3885
3886 l2meta = NULL;
3887
3888 offset_in_cluster = offset_into_cluster(s, dst_offset);
3889 cur_bytes = MIN(bytes, INT_MAX);
3890
3891
3892
3893
3894
3895 ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
3896 &cluster_offset, &l2meta);
3897 if (ret < 0) {
3898 goto fail;
3899 }
3900
3901 assert((cluster_offset & 511) == 0);
3902
3903 ret = qcow2_pre_write_overlap_check(bs, 0,
3904 cluster_offset + offset_in_cluster, cur_bytes, true);
3905 if (ret < 0) {
3906 goto fail;
3907 }
3908
3909 qemu_co_mutex_unlock(&s->lock);
3910 ret = bdrv_co_copy_range_to(src, src_offset,
3911 s->data_file,
3912 cluster_offset + offset_in_cluster,
3913 cur_bytes, read_flags, write_flags);
3914 qemu_co_mutex_lock(&s->lock);
3915 if (ret < 0) {
3916 goto fail;
3917 }
3918
3919 ret = qcow2_handle_l2meta(bs, &l2meta, true);
3920 if (ret) {
3921 goto fail;
3922 }
3923
3924 bytes -= cur_bytes;
3925 src_offset += cur_bytes;
3926 dst_offset += cur_bytes;
3927 }
3928 ret = 0;
3929
3930fail:
3931 qcow2_handle_l2meta(bs, &l2meta, false);
3932
3933 qemu_co_mutex_unlock(&s->lock);
3934
3935 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
3936
3937 return ret;
3938}
3939
3940static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
3941 bool exact, PreallocMode prealloc,
3942 Error **errp)
3943{
3944 BDRVQcow2State *s = bs->opaque;
3945 uint64_t old_length;
3946 int64_t new_l1_size;
3947 int ret;
3948 QDict *options;
3949
3950 if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
3951 prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
3952 {
3953 error_setg(errp, "Unsupported preallocation mode '%s'",
3954 PreallocMode_str(prealloc));
3955 return -ENOTSUP;
3956 }
3957
3958 if (offset & 511) {
3959 error_setg(errp, "The new size must be a multiple of 512");
3960 return -EINVAL;
3961 }
3962
3963 qemu_co_mutex_lock(&s->lock);
3964
3965
3966 if (s->nb_snapshots) {
3967 error_setg(errp, "Can't resize an image which has snapshots");
3968 ret = -ENOTSUP;
3969 goto fail;
3970 }
3971
3972
3973 if (qcow2_truncate_bitmaps_check(bs, errp)) {
3974 ret = -ENOTSUP;
3975 goto fail;
3976 }
3977
3978 old_length = bs->total_sectors * BDRV_SECTOR_SIZE;
3979 new_l1_size = size_to_l1(s, offset);
3980
3981 if (offset < old_length) {
3982 int64_t last_cluster, old_file_size;
3983 if (prealloc != PREALLOC_MODE_OFF) {
3984 error_setg(errp,
3985 "Preallocation can't be used for shrinking an image");
3986 ret = -EINVAL;
3987 goto fail;
3988 }
3989
3990 ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
3991 old_length - ROUND_UP(offset,
3992 s->cluster_size),
3993 QCOW2_DISCARD_ALWAYS, true);
3994 if (ret < 0) {
3995 error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
3996 goto fail;
3997 }
3998
3999 ret = qcow2_shrink_l1_table(bs, new_l1_size);
4000 if (ret < 0) {
4001 error_setg_errno(errp, -ret,
4002 "Failed to reduce the number of L2 tables");
4003 goto fail;
4004 }
4005
4006 ret = qcow2_shrink_reftable(bs);
4007 if (ret < 0) {
4008 error_setg_errno(errp, -ret,
4009 "Failed to discard unused refblocks");
4010 goto fail;
4011 }
4012
4013 old_file_size = bdrv_getlength(bs->file->bs);
4014 if (old_file_size < 0) {
4015 error_setg_errno(errp, -old_file_size,
4016 "Failed to inquire current file length");
4017 ret = old_file_size;
4018 goto fail;
4019 }
4020 last_cluster = qcow2_get_last_cluster(bs, old_file_size);
4021 if (last_cluster < 0) {
4022 error_setg_errno(errp, -last_cluster,
4023 "Failed to find the last cluster");
4024 ret = last_cluster;
4025 goto fail;
4026 }
4027 if ((last_cluster + 1) * s->cluster_size < old_file_size) {
4028 Error *local_err = NULL;
4029
4030
4031
4032
4033
4034
4035
4036
4037 bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
4038 false, PREALLOC_MODE_OFF, &local_err);
4039 if (local_err) {
4040 warn_reportf_err(local_err,
4041 "Failed to truncate the tail of the image: ");
4042 }
4043 }
4044 } else {
4045 ret = qcow2_grow_l1_table(bs, new_l1_size, true);
4046 if (ret < 0) {
4047 error_setg_errno(errp, -ret, "Failed to grow the L1 table");
4048 goto fail;
4049 }
4050 }
4051
4052 switch (prealloc) {
4053 case PREALLOC_MODE_OFF:
4054 if (has_data_file(bs)) {
4055
4056
4057
4058
4059
4060 ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp);
4061 if (ret < 0) {
4062 goto fail;
4063 }
4064 }
4065 break;
4066
4067 case PREALLOC_MODE_METADATA:
4068 ret = preallocate_co(bs, old_length, offset, prealloc, errp);
4069 if (ret < 0) {
4070 goto fail;
4071 }
4072 break;
4073
4074 case PREALLOC_MODE_FALLOC:
4075 case PREALLOC_MODE_FULL:
4076 {
4077 int64_t allocation_start, host_offset, guest_offset;
4078 int64_t clusters_allocated;
4079 int64_t old_file_size, new_file_size;
4080 uint64_t nb_new_data_clusters, nb_new_l2_tables;
4081
4082
4083
4084 if (has_data_file(bs)) {
4085 ret = preallocate_co(bs, old_length, offset, prealloc, errp);
4086 if (ret < 0) {
4087 goto fail;
4088 }
4089 break;
4090 }
4091
4092 old_file_size = bdrv_getlength(bs->file->bs);
4093 if (old_file_size < 0) {
4094 error_setg_errno(errp, -old_file_size,
4095 "Failed to inquire current file length");
4096 ret = old_file_size;
4097 goto fail;
4098 }
4099 old_file_size = ROUND_UP(old_file_size, s->cluster_size);
4100
4101 nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
4102 s->cluster_size);
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114 nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
4115 s->cluster_size / sizeof(uint64_t));
4116
4117
4118 nb_new_l2_tables++;
4119
4120 allocation_start = qcow2_refcount_area(bs, old_file_size,
4121 nb_new_data_clusters +
4122 nb_new_l2_tables,
4123 true, 0, 0);
4124 if (allocation_start < 0) {
4125 error_setg_errno(errp, -allocation_start,
4126 "Failed to resize refcount structures");
4127 ret = allocation_start;
4128 goto fail;
4129 }
4130
4131 clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
4132 nb_new_data_clusters);
4133 if (clusters_allocated < 0) {
4134 error_setg_errno(errp, -clusters_allocated,
4135 "Failed to allocate data clusters");
4136 ret = clusters_allocated;
4137 goto fail;
4138 }
4139
4140 assert(clusters_allocated == nb_new_data_clusters);
4141
4142
4143 new_file_size = allocation_start +
4144 nb_new_data_clusters * s->cluster_size;
4145
4146 ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp);
4147 if (ret < 0) {
4148 error_prepend(errp, "Failed to resize underlying file: ");
4149 qcow2_free_clusters(bs, allocation_start,
4150 nb_new_data_clusters * s->cluster_size,
4151 QCOW2_DISCARD_OTHER);
4152 goto fail;
4153 }
4154
4155
4156 host_offset = allocation_start;
4157 guest_offset = old_length;
4158 while (nb_new_data_clusters) {
4159 int64_t nb_clusters = MIN(
4160 nb_new_data_clusters,
4161 s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
4162 QCowL2Meta allocation = {
4163 .offset = guest_offset,
4164 .alloc_offset = host_offset,
4165 .nb_clusters = nb_clusters,
4166 };
4167 qemu_co_queue_init(&allocation.dependent_requests);
4168
4169 ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
4170 if (ret < 0) {
4171 error_setg_errno(errp, -ret, "Failed to update L2 tables");
4172 qcow2_free_clusters(bs, host_offset,
4173 nb_new_data_clusters * s->cluster_size,
4174 QCOW2_DISCARD_OTHER);
4175 goto fail;
4176 }
4177
4178 guest_offset += nb_clusters * s->cluster_size;
4179 host_offset += nb_clusters * s->cluster_size;
4180 nb_new_data_clusters -= nb_clusters;
4181 }
4182 break;
4183 }
4184
4185 default:
4186 g_assert_not_reached();
4187 }
4188
4189 if (prealloc != PREALLOC_MODE_OFF) {
4190
4191 ret = qcow2_write_caches(bs);
4192 if (ret < 0) {
4193 error_setg_errno(errp, -ret,
4194 "Failed to flush the preallocated area to disk");
4195 goto fail;
4196 }
4197 }
4198
4199 bs->total_sectors = offset / BDRV_SECTOR_SIZE;
4200
4201
4202 offset = cpu_to_be64(offset);
4203 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
4204 &offset, sizeof(uint64_t));
4205 if (ret < 0) {
4206 error_setg_errno(errp, -ret, "Failed to update the image size");
4207 goto fail;
4208 }
4209
4210 s->l1_vm_state_index = new_l1_size;
4211
4212
4213 options = qdict_clone_shallow(bs->options);
4214 ret = qcow2_update_options(bs, options, s->flags, errp);
4215 qobject_unref(options);
4216 if (ret < 0) {
4217 goto fail;
4218 }
4219 ret = 0;
4220fail:
4221 qemu_co_mutex_unlock(&s->lock);
4222 return ret;
4223}
4224
4225
4226
4227static coroutine_fn int
4228qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
4229 uint64_t offset, uint64_t bytes,
4230 QEMUIOVector *qiov, size_t qiov_offset)
4231{
4232 BDRVQcow2State *s = bs->opaque;
4233 int ret;
4234 ssize_t out_len;
4235 uint8_t *buf, *out_buf;
4236 uint64_t cluster_offset;
4237
4238 if (has_data_file(bs)) {
4239 return -ENOTSUP;
4240 }
4241
4242 if (bytes == 0) {
4243
4244
4245 int64_t len = bdrv_getlength(bs->file->bs);
4246 if (len < 0) {
4247 return len;
4248 }
4249 return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
4250 }
4251
4252 if (offset_into_cluster(s, offset)) {
4253 return -EINVAL;
4254 }
4255
4256 buf = qemu_blockalign(bs, s->cluster_size);
4257 if (bytes != s->cluster_size) {
4258 if (bytes > s->cluster_size ||
4259 offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
4260 {
4261 qemu_vfree(buf);
4262 return -EINVAL;
4263 }
4264
4265 memset(buf + bytes, 0, s->cluster_size - bytes);
4266 }
4267 qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
4268
4269 out_buf = g_malloc(s->cluster_size);
4270
4271 out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1,
4272 buf, s->cluster_size);
4273 if (out_len == -ENOMEM) {
4274
4275 ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
4276 if (ret < 0) {
4277 goto fail;
4278 }
4279 goto success;
4280 } else if (out_len < 0) {
4281 ret = -EINVAL;
4282 goto fail;
4283 }
4284
4285 qemu_co_mutex_lock(&s->lock);
4286 ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len,
4287 &cluster_offset);
4288 if (ret < 0) {
4289 qemu_co_mutex_unlock(&s->lock);
4290 goto fail;
4291 }
4292
4293 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true);
4294 qemu_co_mutex_unlock(&s->lock);
4295 if (ret < 0) {
4296 goto fail;
4297 }
4298
4299 BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
4300 ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
4301 if (ret < 0) {
4302 goto fail;
4303 }
4304success:
4305 ret = 0;
4306fail:
4307 qemu_vfree(buf);
4308 g_free(out_buf);
4309 return ret;
4310}
4311
4312static int coroutine_fn
4313qcow2_co_preadv_compressed(BlockDriverState *bs,
4314 uint64_t file_cluster_offset,
4315 uint64_t offset,
4316 uint64_t bytes,
4317 QEMUIOVector *qiov,
4318 size_t qiov_offset)
4319{
4320 BDRVQcow2State *s = bs->opaque;
4321 int ret = 0, csize, nb_csectors;
4322 uint64_t coffset;
4323 uint8_t *buf, *out_buf;
4324 int offset_in_cluster = offset_into_cluster(s, offset);
4325
4326 coffset = file_cluster_offset & s->cluster_offset_mask;
4327 nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
4328 csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
4329 (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK);
4330
4331 buf = g_try_malloc(csize);
4332 if (!buf) {
4333 return -ENOMEM;
4334 }
4335
4336 out_buf = qemu_blockalign(bs, s->cluster_size);
4337
4338 BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
4339 ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
4340 if (ret < 0) {
4341 goto fail;
4342 }
4343
4344 if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
4345 ret = -EIO;
4346 goto fail;
4347 }
4348
4349 qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);
4350
4351fail:
4352 qemu_vfree(out_buf);
4353 g_free(buf);
4354
4355 return ret;
4356}
4357
4358static int make_completely_empty(BlockDriverState *bs)
4359{
4360 BDRVQcow2State *s = bs->opaque;
4361 Error *local_err = NULL;
4362 int ret, l1_clusters;
4363 int64_t offset;
4364 uint64_t *new_reftable = NULL;
4365 uint64_t rt_entry, l1_size2;
4366 struct {
4367 uint64_t l1_offset;
4368 uint64_t reftable_offset;
4369 uint32_t reftable_clusters;
4370 } QEMU_PACKED l1_ofs_rt_ofs_cls;
4371
4372 ret = qcow2_cache_empty(bs, s->l2_table_cache);
4373 if (ret < 0) {
4374 goto fail;
4375 }
4376
4377 ret = qcow2_cache_empty(bs, s->refcount_block_cache);
4378 if (ret < 0) {
4379 goto fail;
4380 }
4381
4382
4383 ret = qcow2_mark_dirty(bs);
4384 if (ret < 0) {
4385 goto fail;
4386 }
4387
4388 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4389
4390 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4391 l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
4392
4393
4394
4395
4396 ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
4397 l1_clusters * s->cluster_size, 0);
4398 if (ret < 0) {
4399 goto fail_broken_refcounts;
4400 }
4401 memset(s->l1_table, 0, l1_size2);
4402
4403 BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
4404
4405
4406
4407
4408
4409
4410 ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
4411 (2 + l1_clusters) * s->cluster_size, 0);
4412
4413
4414
4415
4416 if (ret < 0) {
4417 goto fail_broken_refcounts;
4418 }
4419
4420 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4421 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
4422
4423
4424
4425
4426 l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
4427 l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
4428 l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
4429 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
4430 &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
4431 if (ret < 0) {
4432 goto fail_broken_refcounts;
4433 }
4434
4435 s->l1_table_offset = 3 * s->cluster_size;
4436
4437 new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
4438 if (!new_reftable) {
4439 ret = -ENOMEM;
4440 goto fail_broken_refcounts;
4441 }
4442
4443 s->refcount_table_offset = s->cluster_size;
4444 s->refcount_table_size = s->cluster_size / sizeof(uint64_t);
4445 s->max_refcount_table_index = 0;
4446
4447 g_free(s->refcount_table);
4448 s->refcount_table = new_reftable;
4449 new_reftable = NULL;
4450
4451
4452
4453
4454
4455
4456
4457 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
4458
4459
4460 rt_entry = cpu_to_be64(2 * s->cluster_size);
4461 ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
4462 &rt_entry, sizeof(rt_entry));
4463 if (ret < 0) {
4464 goto fail_broken_refcounts;
4465 }
4466 s->refcount_table[0] = 2 * s->cluster_size;
4467
4468 s->free_cluster_index = 0;
4469 assert(3 + l1_clusters <= s->refcount_block_size);
4470 offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
4471 if (offset < 0) {
4472 ret = offset;
4473 goto fail_broken_refcounts;
4474 } else if (offset > 0) {
4475 error_report("First cluster in emptied image is in use");
4476 abort();
4477 }
4478
4479
4480
4481 ret = qcow2_mark_clean(bs);
4482 if (ret < 0) {
4483 goto fail;
4484 }
4485
4486 ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
4487 PREALLOC_MODE_OFF, &local_err);
4488 if (ret < 0) {
4489 error_report_err(local_err);
4490 goto fail;
4491 }
4492
4493 return 0;
4494
4495fail_broken_refcounts:
4496
4497
4498
4499
4500
4501
4502 bs->drv = NULL;
4503
4504fail:
4505 g_free(new_reftable);
4506 return ret;
4507}
4508
4509static int qcow2_make_empty(BlockDriverState *bs)
4510{
4511 BDRVQcow2State *s = bs->opaque;
4512 uint64_t offset, end_offset;
4513 int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
4514 int l1_clusters, ret = 0;
4515
4516 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4517
4518 if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
4519 3 + l1_clusters <= s->refcount_block_size &&
4520 s->crypt_method_header != QCOW_CRYPT_LUKS &&
4521 !has_data_file(bs)) {
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531 return make_completely_empty(bs);
4532 }
4533
4534
4535
4536 end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
4537 for (offset = 0; offset < end_offset; offset += step) {
4538
4539
4540
4541
4542
4543 ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
4544 QCOW2_DISCARD_SNAPSHOT, true);
4545 if (ret < 0) {
4546 break;
4547 }
4548 }
4549
4550 return ret;
4551}
4552
4553static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
4554{
4555 BDRVQcow2State *s = bs->opaque;
4556 int ret;
4557
4558 qemu_co_mutex_lock(&s->lock);
4559 ret = qcow2_write_caches(bs);
4560 qemu_co_mutex_unlock(&s->lock);
4561
4562 return ret;
4563}
4564
4565static ssize_t qcow2_measure_crypto_hdr_init_func(QCryptoBlock *block,
4566 size_t headerlen, void *opaque, Error **errp)
4567{
4568 size_t *headerlenp = opaque;
4569
4570
4571 *headerlenp = headerlen;
4572 return 0;
4573}
4574
4575static ssize_t qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block,
4576 size_t offset, const uint8_t *buf, size_t buflen,
4577 void *opaque, Error **errp)
4578{
4579
4580 return buflen;
4581}
4582
4583
4584static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len,
4585 Error **errp)
4586{
4587 QDict *opts_qdict;
4588 QDict *cryptoopts_qdict;
4589 QCryptoBlockCreateOptions *cryptoopts;
4590 QCryptoBlock *crypto;
4591
4592
4593 opts_qdict = qemu_opts_to_qdict(opts, NULL);
4594 qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
4595 qobject_unref(opts_qdict);
4596
4597
4598 qdict_put_str(cryptoopts_qdict, "format", "luks");
4599 cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp);
4600 qobject_unref(cryptoopts_qdict);
4601 if (!cryptoopts) {
4602 return false;
4603 }
4604
4605
4606 crypto = qcrypto_block_create(cryptoopts, "encrypt.",
4607 qcow2_measure_crypto_hdr_init_func,
4608 qcow2_measure_crypto_hdr_write_func,
4609 len, errp);
4610 qapi_free_QCryptoBlockCreateOptions(cryptoopts);
4611 if (!crypto) {
4612 return false;
4613 }
4614
4615 qcrypto_block_free(crypto);
4616 return true;
4617}
4618
4619static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
4620 Error **errp)
4621{
4622 Error *local_err = NULL;
4623 BlockMeasureInfo *info;
4624 uint64_t required = 0;
4625 uint64_t virtual_size;
4626 uint64_t refcount_bits;
4627 uint64_t l2_tables;
4628 uint64_t luks_payload_size = 0;
4629 size_t cluster_size;
4630 int version;
4631 char *optstr;
4632 PreallocMode prealloc;
4633 bool has_backing_file;
4634 bool has_luks;
4635
4636
4637 cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err);
4638 if (local_err) {
4639 goto err;
4640 }
4641
4642 version = qcow2_opt_get_version_del(opts, &local_err);
4643 if (local_err) {
4644 goto err;
4645 }
4646
4647 refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
4648 if (local_err) {
4649 goto err;
4650 }
4651
4652 optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
4653 prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
4654 PREALLOC_MODE_OFF, &local_err);
4655 g_free(optstr);
4656 if (local_err) {
4657 goto err;
4658 }
4659
4660 optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
4661 has_backing_file = !!optstr;
4662 g_free(optstr);
4663
4664 optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
4665 has_luks = optstr && strcmp(optstr, "luks") == 0;
4666 g_free(optstr);
4667
4668 if (has_luks) {
4669 size_t headerlen;
4670
4671 if (!qcow2_measure_luks_headerlen(opts, &headerlen, &local_err)) {
4672 goto err;
4673 }
4674
4675 luks_payload_size = ROUND_UP(headerlen, cluster_size);
4676 }
4677
4678 virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
4679 virtual_size = ROUND_UP(virtual_size, cluster_size);
4680
4681
4682 l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
4683 cluster_size / sizeof(uint64_t));
4684 if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) {
4685 error_setg(&local_err, "The image size is too large "
4686 "(try using a larger cluster size)");
4687 goto err;
4688 }
4689
4690
4691 if (in_bs) {
4692 int64_t ssize = bdrv_getlength(in_bs);
4693 if (ssize < 0) {
4694 error_setg_errno(&local_err, -ssize,
4695 "Unable to get image virtual_size");
4696 goto err;
4697 }
4698
4699 virtual_size = ROUND_UP(ssize, cluster_size);
4700
4701 if (has_backing_file) {
4702
4703
4704
4705
4706
4707 required = virtual_size;
4708 } else {
4709 int64_t offset;
4710 int64_t pnum = 0;
4711
4712 for (offset = 0; offset < ssize; offset += pnum) {
4713 int ret;
4714
4715 ret = bdrv_block_status_above(in_bs, NULL, offset,
4716 ssize - offset, &pnum, NULL,
4717 NULL);
4718 if (ret < 0) {
4719 error_setg_errno(&local_err, -ret,
4720 "Unable to get block status");
4721 goto err;
4722 }
4723
4724 if (ret & BDRV_BLOCK_ZERO) {
4725
4726 } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
4727 (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
4728
4729 pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
4730
4731
4732 required += offset % cluster_size + pnum;
4733 }
4734 }
4735 }
4736 }
4737
4738
4739
4740
4741 if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
4742 required = virtual_size;
4743 }
4744
4745 info = g_new(BlockMeasureInfo, 1);
4746 info->fully_allocated =
4747 qcow2_calc_prealloc_size(virtual_size, cluster_size,
4748 ctz32(refcount_bits)) + luks_payload_size;
4749
4750
4751
4752
4753
4754 info->required = info->fully_allocated - virtual_size + required;
4755 return info;
4756
4757err:
4758 error_propagate(errp, local_err);
4759 return NULL;
4760}
4761
4762static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4763{
4764 BDRVQcow2State *s = bs->opaque;
4765 bdi->unallocated_blocks_are_zero = true;
4766 bdi->cluster_size = s->cluster_size;
4767 bdi->vm_state_offset = qcow2_vm_state_offset(s);
4768 return 0;
4769}
4770
4771static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
4772 Error **errp)
4773{
4774 BDRVQcow2State *s = bs->opaque;
4775 ImageInfoSpecific *spec_info;
4776 QCryptoBlockInfo *encrypt_info = NULL;
4777 Error *local_err = NULL;
4778
4779 if (s->crypto != NULL) {
4780 encrypt_info = qcrypto_block_get_info(s->crypto, &local_err);
4781 if (local_err) {
4782 error_propagate(errp, local_err);
4783 return NULL;
4784 }
4785 }
4786
4787 spec_info = g_new(ImageInfoSpecific, 1);
4788 *spec_info = (ImageInfoSpecific){
4789 .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
4790 .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1),
4791 };
4792 if (s->qcow_version == 2) {
4793 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4794 .compat = g_strdup("0.10"),
4795 .refcount_bits = s->refcount_bits,
4796 };
4797 } else if (s->qcow_version == 3) {
4798 Qcow2BitmapInfoList *bitmaps;
4799 bitmaps = qcow2_get_bitmap_info_list(bs, &local_err);
4800 if (local_err) {
4801 error_propagate(errp, local_err);
4802 qapi_free_ImageInfoSpecific(spec_info);
4803 return NULL;
4804 }
4805 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4806 .compat = g_strdup("1.1"),
4807 .lazy_refcounts = s->compatible_features &
4808 QCOW2_COMPAT_LAZY_REFCOUNTS,
4809 .has_lazy_refcounts = true,
4810 .corrupt = s->incompatible_features &
4811 QCOW2_INCOMPAT_CORRUPT,
4812 .has_corrupt = true,
4813 .refcount_bits = s->refcount_bits,
4814 .has_bitmaps = !!bitmaps,
4815 .bitmaps = bitmaps,
4816 .has_data_file = !!s->image_data_file,
4817 .data_file = g_strdup(s->image_data_file),
4818 .has_data_file_raw = has_data_file(bs),
4819 .data_file_raw = data_file_is_raw(bs),
4820 };
4821 } else {
4822
4823
4824 assert(false);
4825 }
4826
4827 if (encrypt_info) {
4828 ImageInfoSpecificQCow2Encryption *qencrypt =
4829 g_new(ImageInfoSpecificQCow2Encryption, 1);
4830 switch (encrypt_info->format) {
4831 case Q_CRYPTO_BLOCK_FORMAT_QCOW:
4832 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
4833 break;
4834 case Q_CRYPTO_BLOCK_FORMAT_LUKS:
4835 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
4836 qencrypt->u.luks = encrypt_info->u.luks;
4837 break;
4838 default:
4839 abort();
4840 }
4841
4842
4843 memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
4844 qapi_free_QCryptoBlockInfo(encrypt_info);
4845
4846 spec_info->u.qcow2.data->has_encrypt = true;
4847 spec_info->u.qcow2.data->encrypt = qencrypt;
4848 }
4849
4850 return spec_info;
4851}
4852
4853static int qcow2_has_zero_init(BlockDriverState *bs)
4854{
4855 BDRVQcow2State *s = bs->opaque;
4856 bool preallocated;
4857
4858 if (qemu_in_coroutine()) {
4859 qemu_co_mutex_lock(&s->lock);
4860 }
4861
4862
4863
4864
4865
4866 preallocated = s->l1_size > 0 && s->l1_table[0] != 0;
4867 if (qemu_in_coroutine()) {
4868 qemu_co_mutex_unlock(&s->lock);
4869 }
4870
4871 if (!preallocated) {
4872 return 1;
4873 } else if (bs->encrypted) {
4874 return 0;
4875 } else {
4876 return bdrv_has_zero_init(s->data_file->bs);
4877 }
4878}
4879
4880static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4881 int64_t pos)
4882{
4883 BDRVQcow2State *s = bs->opaque;
4884
4885 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
4886 return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
4887 qiov->size, qiov, 0, 0);
4888}
4889
4890static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4891 int64_t pos)
4892{
4893 BDRVQcow2State *s = bs->opaque;
4894
4895 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
4896 return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos,
4897 qiov->size, qiov, 0, 0);
4898}
4899
4900
4901
4902
4903
4904static int qcow2_downgrade(BlockDriverState *bs, int target_version,
4905 BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
4906 Error **errp)
4907{
4908 BDRVQcow2State *s = bs->opaque;
4909 int current_version = s->qcow_version;
4910 int ret;
4911
4912
4913 assert(target_version < current_version);
4914
4915
4916 assert(target_version == 2);
4917
4918 if (s->refcount_order != 4) {
4919 error_setg(errp, "compat=0.10 requires refcount_bits=16");
4920 return -ENOTSUP;
4921 }
4922
4923 if (has_data_file(bs)) {
4924 error_setg(errp, "Cannot downgrade an image with a data file");
4925 return -ENOTSUP;
4926 }
4927
4928
4929 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
4930 ret = qcow2_mark_clean(bs);
4931 if (ret < 0) {
4932 error_setg_errno(errp, -ret, "Failed to make the image clean");
4933 return ret;
4934 }
4935 }
4936
4937
4938
4939
4940
4941 if (s->incompatible_features) {
4942 error_setg(errp, "Cannot downgrade an image with incompatible features "
4943 "%#" PRIx64 " set", s->incompatible_features);
4944 return -ENOTSUP;
4945 }
4946
4947
4948 s->compatible_features = 0;
4949
4950
4951
4952
4953 s->autoclear_features = 0;
4954
4955 ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
4956 if (ret < 0) {
4957 error_setg_errno(errp, -ret, "Failed to turn zero into data clusters");
4958 return ret;
4959 }
4960
4961 s->qcow_version = target_version;
4962 ret = qcow2_update_header(bs);
4963 if (ret < 0) {
4964 s->qcow_version = current_version;
4965 error_setg_errno(errp, -ret, "Failed to update the image header");
4966 return ret;
4967 }
4968 return 0;
4969}
4970
4971
4972
4973
4974
4975
4976static int qcow2_upgrade(BlockDriverState *bs, int target_version,
4977 BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
4978 Error **errp)
4979{
4980 BDRVQcow2State *s = bs->opaque;
4981 bool need_snapshot_update;
4982 int current_version = s->qcow_version;
4983 int i;
4984 int ret;
4985
4986
4987 assert(target_version > current_version);
4988
4989
4990 assert(target_version == 3);
4991
4992 status_cb(bs, 0, 2, cb_opaque);
4993
4994
4995
4996
4997
4998
4999
5000
5001 need_snapshot_update = false;
5002 for (i = 0; i < s->nb_snapshots; i++) {
5003 if (s->snapshots[i].extra_data_size <
5004 sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
5005 sizeof_field(QCowSnapshotExtraData, disk_size))
5006 {
5007 need_snapshot_update = true;
5008 break;
5009 }
5010 }
5011 if (need_snapshot_update) {
5012 ret = qcow2_write_snapshots(bs);
5013 if (ret < 0) {
5014 error_setg_errno(errp, -ret, "Failed to update the snapshot table");
5015 return ret;
5016 }
5017 }
5018 status_cb(bs, 1, 2, cb_opaque);
5019
5020 s->qcow_version = target_version;
5021 ret = qcow2_update_header(bs);
5022 if (ret < 0) {
5023 s->qcow_version = current_version;
5024 error_setg_errno(errp, -ret, "Failed to update the image header");
5025 return ret;
5026 }
5027 status_cb(bs, 2, 2, cb_opaque);
5028
5029 return 0;
5030}
5031
5032typedef enum Qcow2AmendOperation {
5033
5034
5035
5036 QCOW2_NO_OPERATION = 0,
5037
5038 QCOW2_UPGRADING,
5039 QCOW2_CHANGING_REFCOUNT_ORDER,
5040 QCOW2_DOWNGRADING,
5041} Qcow2AmendOperation;
5042
5043typedef struct Qcow2AmendHelperCBInfo {
5044
5045
5046 BlockDriverAmendStatusCB *original_status_cb;
5047 void *original_cb_opaque;
5048
5049 Qcow2AmendOperation current_operation;
5050
5051
5052 int total_operations;
5053
5054
5055
5056
5057 int operations_completed;
5058
5059
5060 int64_t offset_completed;
5061
5062 Qcow2AmendOperation last_operation;
5063 int64_t last_work_size;
5064} Qcow2AmendHelperCBInfo;
5065
5066static void qcow2_amend_helper_cb(BlockDriverState *bs,
5067 int64_t operation_offset,
5068 int64_t operation_work_size, void *opaque)
5069{
5070 Qcow2AmendHelperCBInfo *info = opaque;
5071 int64_t current_work_size;
5072 int64_t projected_work_size;
5073
5074 if (info->current_operation != info->last_operation) {
5075 if (info->last_operation != QCOW2_NO_OPERATION) {
5076 info->offset_completed += info->last_work_size;
5077 info->operations_completed++;
5078 }
5079
5080 info->last_operation = info->current_operation;
5081 }
5082
5083 assert(info->total_operations > 0);
5084 assert(info->operations_completed < info->total_operations);
5085
5086 info->last_work_size = operation_work_size;
5087
5088 current_work_size = info->offset_completed + operation_work_size;
5089
5090
5091
5092
5093
5094 projected_work_size = current_work_size * (info->total_operations -
5095 info->operations_completed - 1)
5096 / (info->operations_completed + 1);
5097
5098 info->original_status_cb(bs, info->offset_completed + operation_offset,
5099 current_work_size + projected_work_size,
5100 info->original_cb_opaque);
5101}
5102
5103static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
5104 BlockDriverAmendStatusCB *status_cb,
5105 void *cb_opaque,
5106 Error **errp)
5107{
5108 BDRVQcow2State *s = bs->opaque;
5109 int old_version = s->qcow_version, new_version = old_version;
5110 uint64_t new_size = 0;
5111 const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL;
5112 bool lazy_refcounts = s->use_lazy_refcounts;
5113 bool data_file_raw = data_file_is_raw(bs);
5114 const char *compat = NULL;
5115 uint64_t cluster_size = s->cluster_size;
5116 bool encrypt;
5117 int encformat;
5118 int refcount_bits = s->refcount_bits;
5119 int ret;
5120 QemuOptDesc *desc = opts->list->desc;
5121 Qcow2AmendHelperCBInfo helper_cb_info;
5122
5123 while (desc && desc->name) {
5124 if (!qemu_opt_find(opts, desc->name)) {
5125
5126 desc++;
5127 continue;
5128 }
5129
5130 if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
5131 compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
5132 if (!compat) {
5133
5134 } else if (!strcmp(compat, "0.10") || !strcmp(compat, "v2")) {
5135 new_version = 2;
5136 } else if (!strcmp(compat, "1.1") || !strcmp(compat, "v3")) {
5137 new_version = 3;
5138 } else {
5139 error_setg(errp, "Unknown compatibility level %s", compat);
5140 return -EINVAL;
5141 }
5142 } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
5143 error_setg(errp, "Cannot change preallocation mode");
5144 return -ENOTSUP;
5145 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
5146 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5147 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
5148 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5149 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
5150 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5151 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
5152 encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
5153 !!s->crypto);
5154
5155 if (encrypt != !!s->crypto) {
5156 error_setg(errp,
5157 "Changing the encryption flag is not supported");
5158 return -ENOTSUP;
5159 }
5160 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
5161 encformat = qcow2_crypt_method_from_format(
5162 qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));
5163
5164 if (encformat != s->crypt_method_header) {
5165 error_setg(errp,
5166 "Changing the encryption format is not supported");
5167 return -ENOTSUP;
5168 }
5169 } else if (g_str_has_prefix(desc->name, "encrypt.")) {
5170 error_setg(errp,
5171 "Changing the encryption parameters is not supported");
5172 return -ENOTSUP;
5173 } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
5174 cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
5175 cluster_size);
5176 if (cluster_size != s->cluster_size) {
5177 error_setg(errp, "Changing the cluster size is not supported");
5178 return -ENOTSUP;
5179 }
5180 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
5181 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
5182 lazy_refcounts);
5183 } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
5184 refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
5185 refcount_bits);
5186
5187 if (refcount_bits <= 0 || refcount_bits > 64 ||
5188 !is_power_of_2(refcount_bits))
5189 {
5190 error_setg(errp, "Refcount width must be a power of two and "
5191 "may not exceed 64 bits");
5192 return -EINVAL;
5193 }
5194 } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) {
5195 data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE);
5196 if (data_file && !has_data_file(bs)) {
5197 error_setg(errp, "data-file can only be set for images that "
5198 "use an external data file");
5199 return -EINVAL;
5200 }
5201 } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) {
5202 data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW,
5203 data_file_raw);
5204 if (data_file_raw && !data_file_is_raw(bs)) {
5205 error_setg(errp, "data-file-raw cannot be set on existing "
5206 "images");
5207 return -EINVAL;
5208 }
5209 } else {
5210
5211
5212 abort();
5213 }
5214
5215 desc++;
5216 }
5217
5218 helper_cb_info = (Qcow2AmendHelperCBInfo){
5219 .original_status_cb = status_cb,
5220 .original_cb_opaque = cb_opaque,
5221 .total_operations = (new_version != old_version)
5222 + (s->refcount_bits != refcount_bits)
5223 };
5224
5225
5226 if (new_version > old_version) {
5227 helper_cb_info.current_operation = QCOW2_UPGRADING;
5228 ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb,
5229 &helper_cb_info, errp);
5230 if (ret < 0) {
5231 return ret;
5232 }
5233 }
5234
5235 if (s->refcount_bits != refcount_bits) {
5236 int refcount_order = ctz32(refcount_bits);
5237
5238 if (new_version < 3 && refcount_bits != 16) {
5239 error_setg(errp, "Refcount widths other than 16 bits require "
5240 "compatibility level 1.1 or above (use compat=1.1 or "
5241 "greater)");
5242 return -EINVAL;
5243 }
5244
5245 helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
5246 ret = qcow2_change_refcount_order(bs, refcount_order,
5247 &qcow2_amend_helper_cb,
5248 &helper_cb_info, errp);
5249 if (ret < 0) {
5250 return ret;
5251 }
5252 }
5253
5254
5255 if (data_file_raw) {
5256 s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5257 } else {
5258 s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5259 }
5260
5261 if (data_file) {
5262 g_free(s->image_data_file);
5263 s->image_data_file = *data_file ? g_strdup(data_file) : NULL;
5264 }
5265
5266 ret = qcow2_update_header(bs);
5267 if (ret < 0) {
5268 error_setg_errno(errp, -ret, "Failed to update the image header");
5269 return ret;
5270 }
5271
5272 if (backing_file || backing_format) {
5273 ret = qcow2_change_backing_file(bs,
5274 backing_file ?: s->image_backing_file,
5275 backing_format ?: s->image_backing_format);
5276 if (ret < 0) {
5277 error_setg_errno(errp, -ret, "Failed to change the backing file");
5278 return ret;
5279 }
5280 }
5281
5282 if (s->use_lazy_refcounts != lazy_refcounts) {
5283 if (lazy_refcounts) {
5284 if (new_version < 3) {
5285 error_setg(errp, "Lazy refcounts only supported with "
5286 "compatibility level 1.1 and above (use compat=1.1 "
5287 "or greater)");
5288 return -EINVAL;
5289 }
5290 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5291 ret = qcow2_update_header(bs);
5292 if (ret < 0) {
5293 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5294 error_setg_errno(errp, -ret, "Failed to update the image header");
5295 return ret;
5296 }
5297 s->use_lazy_refcounts = true;
5298 } else {
5299
5300 ret = qcow2_mark_clean(bs);
5301 if (ret < 0) {
5302 error_setg_errno(errp, -ret, "Failed to make the image clean");
5303 return ret;
5304 }
5305
5306 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5307 ret = qcow2_update_header(bs);
5308 if (ret < 0) {
5309 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5310 error_setg_errno(errp, -ret, "Failed to update the image header");
5311 return ret;
5312 }
5313 s->use_lazy_refcounts = false;
5314 }
5315 }
5316
5317 if (new_size) {
5318 BlockBackend *blk = blk_new(bdrv_get_aio_context(bs),
5319 BLK_PERM_RESIZE, BLK_PERM_ALL);
5320 ret = blk_insert_bs(blk, bs, errp);
5321 if (ret < 0) {
5322 blk_unref(blk);
5323 return ret;
5324 }
5325
5326
5327
5328
5329
5330 ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp);
5331 blk_unref(blk);
5332 if (ret < 0) {
5333 return ret;
5334 }
5335 }
5336
5337
5338 if (new_version < old_version) {
5339 helper_cb_info.current_operation = QCOW2_DOWNGRADING;
5340 ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
5341 &helper_cb_info, errp);
5342 if (ret < 0) {
5343 return ret;
5344 }
5345 }
5346
5347 return 0;
5348}
5349
5350
5351
5352
5353
5354
5355
5356void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
5357 int64_t size, const char *message_format, ...)
5358{
5359 BDRVQcow2State *s = bs->opaque;
5360 const char *node_name;
5361 char *message;
5362 va_list ap;
5363
5364 fatal = fatal && bdrv_is_writable(bs);
5365
5366 if (s->signaled_corruption &&
5367 (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
5368 {
5369 return;
5370 }
5371
5372 va_start(ap, message_format);
5373 message = g_strdup_vprintf(message_format, ap);
5374 va_end(ap);
5375
5376 if (fatal) {
5377 fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
5378 "corruption events will be suppressed\n", message);
5379 } else {
5380 fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
5381 "corruption events will be suppressed\n", message);
5382 }
5383
5384 node_name = bdrv_get_node_name(bs);
5385 qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
5386 *node_name != '\0', node_name,
5387 message, offset >= 0, offset,
5388 size >= 0, size,
5389 fatal);
5390 g_free(message);
5391
5392 if (fatal) {
5393 qcow2_mark_corrupt(bs);
5394 bs->drv = NULL;
5395 }
5396
5397 s->signaled_corruption = true;
5398}
5399
5400static QemuOptsList qcow2_create_opts = {
5401 .name = "qcow2-create-opts",
5402 .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
5403 .desc = {
5404 {
5405 .name = BLOCK_OPT_SIZE,
5406 .type = QEMU_OPT_SIZE,
5407 .help = "Virtual disk size"
5408 },
5409 {
5410 .name = BLOCK_OPT_COMPAT_LEVEL,
5411 .type = QEMU_OPT_STRING,
5412 .help = "Compatibility level (v2 [0.10] or v3 [1.1])"
5413 },
5414 {
5415 .name = BLOCK_OPT_BACKING_FILE,
5416 .type = QEMU_OPT_STRING,
5417 .help = "File name of a base image"
5418 },
5419 {
5420 .name = BLOCK_OPT_BACKING_FMT,
5421 .type = QEMU_OPT_STRING,
5422 .help = "Image format of the base image"
5423 },
5424 {
5425 .name = BLOCK_OPT_DATA_FILE,
5426 .type = QEMU_OPT_STRING,
5427 .help = "File name of an external data file"
5428 },
5429 {
5430 .name = BLOCK_OPT_DATA_FILE_RAW,
5431 .type = QEMU_OPT_BOOL,
5432 .help = "The external data file must stay valid as a raw image"
5433 },
5434 {
5435 .name = BLOCK_OPT_ENCRYPT,
5436 .type = QEMU_OPT_BOOL,
5437 .help = "Encrypt the image with format 'aes'. (Deprecated "
5438 "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
5439 },
5440 {
5441 .name = BLOCK_OPT_ENCRYPT_FORMAT,
5442 .type = QEMU_OPT_STRING,
5443 .help = "Encrypt the image, format choices: 'aes', 'luks'",
5444 },
5445 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
5446 "ID of secret providing qcow AES key or LUKS passphrase"),
5447 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."),
5448 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."),
5449 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."),
5450 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."),
5451 BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."),
5452 BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
5453 {
5454 .name = BLOCK_OPT_CLUSTER_SIZE,
5455 .type = QEMU_OPT_SIZE,
5456 .help = "qcow2 cluster size",
5457 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
5458 },
5459 {
5460 .name = BLOCK_OPT_PREALLOC,
5461 .type = QEMU_OPT_STRING,
5462 .help = "Preallocation mode (allowed values: off, metadata, "
5463 "falloc, full)"
5464 },
5465 {
5466 .name = BLOCK_OPT_LAZY_REFCOUNTS,
5467 .type = QEMU_OPT_BOOL,
5468 .help = "Postpone refcount updates",
5469 .def_value_str = "off"
5470 },
5471 {
5472 .name = BLOCK_OPT_REFCOUNT_BITS,
5473 .type = QEMU_OPT_NUMBER,
5474 .help = "Width of a reference count entry in bits",
5475 .def_value_str = "16"
5476 },
5477 { }
5478 }
5479};
5480
5481static const char *const qcow2_strong_runtime_opts[] = {
5482 "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,
5483
5484 NULL
5485};
5486
5487BlockDriver bdrv_qcow2 = {
5488 .format_name = "qcow2",
5489 .instance_size = sizeof(BDRVQcow2State),
5490 .bdrv_probe = qcow2_probe,
5491 .bdrv_open = qcow2_open,
5492 .bdrv_close = qcow2_close,
5493 .bdrv_reopen_prepare = qcow2_reopen_prepare,
5494 .bdrv_reopen_commit = qcow2_reopen_commit,
5495 .bdrv_reopen_abort = qcow2_reopen_abort,
5496 .bdrv_join_options = qcow2_join_options,
5497 .bdrv_child_perm = bdrv_format_default_perms,
5498 .bdrv_co_create_opts = qcow2_co_create_opts,
5499 .bdrv_co_create = qcow2_co_create,
5500 .bdrv_has_zero_init = qcow2_has_zero_init,
5501 .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
5502 .bdrv_co_block_status = qcow2_co_block_status,
5503
5504 .bdrv_co_preadv_part = qcow2_co_preadv_part,
5505 .bdrv_co_pwritev_part = qcow2_co_pwritev_part,
5506 .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
5507
5508 .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
5509 .bdrv_co_pdiscard = qcow2_co_pdiscard,
5510 .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
5511 .bdrv_co_copy_range_to = qcow2_co_copy_range_to,
5512 .bdrv_co_truncate = qcow2_co_truncate,
5513 .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
5514 .bdrv_make_empty = qcow2_make_empty,
5515
5516 .bdrv_snapshot_create = qcow2_snapshot_create,
5517 .bdrv_snapshot_goto = qcow2_snapshot_goto,
5518 .bdrv_snapshot_delete = qcow2_snapshot_delete,
5519 .bdrv_snapshot_list = qcow2_snapshot_list,
5520 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
5521 .bdrv_measure = qcow2_measure,
5522 .bdrv_get_info = qcow2_get_info,
5523 .bdrv_get_specific_info = qcow2_get_specific_info,
5524
5525 .bdrv_save_vmstate = qcow2_save_vmstate,
5526 .bdrv_load_vmstate = qcow2_load_vmstate,
5527
5528 .supports_backing = true,
5529 .bdrv_change_backing_file = qcow2_change_backing_file,
5530
5531 .bdrv_refresh_limits = qcow2_refresh_limits,
5532 .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache,
5533 .bdrv_inactivate = qcow2_inactivate,
5534
5535 .create_opts = &qcow2_create_opts,
5536 .strong_runtime_opts = qcow2_strong_runtime_opts,
5537 .mutable_opts = mutable_opts,
5538 .bdrv_co_check = qcow2_co_check,
5539 .bdrv_amend_options = qcow2_amend_options,
5540
5541 .bdrv_detach_aio_context = qcow2_detach_aio_context,
5542 .bdrv_attach_aio_context = qcow2_attach_aio_context,
5543
5544 .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap,
5545 .bdrv_co_remove_persistent_dirty_bitmap =
5546 qcow2_co_remove_persistent_dirty_bitmap,
5547};
5548
5549static void bdrv_qcow2_init(void)
5550{
5551 bdrv_register(&bdrv_qcow2);
5552}
5553
5554block_init(bdrv_qcow2_init);
5555