1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26
27#define ZLIB_CONST
28#include <zlib.h>
29
30#include "block/block_int.h"
31#include "block/qdict.h"
32#include "sysemu/block-backend.h"
33#include "qemu/module.h"
34#include "qcow2.h"
35#include "qemu/error-report.h"
36#include "qapi/error.h"
37#include "qapi/qapi-events-block-core.h"
38#include "qapi/qmp/qdict.h"
39#include "qapi/qmp/qstring.h"
40#include "trace.h"
41#include "qemu/option_int.h"
42#include "qemu/cutils.h"
43#include "qemu/bswap.h"
44#include "qapi/qobject-input-visitor.h"
45#include "qapi/qapi-visit-block-core.h"
46#include "crypto.h"
47#include "block/thread-pool.h"
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66typedef struct {
67 uint32_t magic;
68 uint32_t len;
69} QEMU_PACKED QCowExtension;
70
71#define QCOW2_EXT_MAGIC_END 0
72#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
73#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
74#define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
75#define QCOW2_EXT_MAGIC_BITMAPS 0x23852875
76
77static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
78{
79 const QCowHeader *cow_header = (const void *)buf;
80
81 if (buf_size >= sizeof(QCowHeader) &&
82 be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
83 be32_to_cpu(cow_header->version) >= 2)
84 return 100;
85 else
86 return 0;
87}
88
89
90static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
91 uint8_t *buf, size_t buflen,
92 void *opaque, Error **errp)
93{
94 BlockDriverState *bs = opaque;
95 BDRVQcow2State *s = bs->opaque;
96 ssize_t ret;
97
98 if ((offset + buflen) > s->crypto_header.length) {
99 error_setg(errp, "Request for data outside of extension header");
100 return -1;
101 }
102
103 ret = bdrv_pread(bs->file,
104 s->crypto_header.offset + offset, buf, buflen);
105 if (ret < 0) {
106 error_setg_errno(errp, -ret, "Could not read encryption header");
107 return -1;
108 }
109 return ret;
110}
111
112
113static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
114 void *opaque, Error **errp)
115{
116 BlockDriverState *bs = opaque;
117 BDRVQcow2State *s = bs->opaque;
118 int64_t ret;
119 int64_t clusterlen;
120
121 ret = qcow2_alloc_clusters(bs, headerlen);
122 if (ret < 0) {
123 error_setg_errno(errp, -ret,
124 "Cannot allocate cluster for LUKS header size %zu",
125 headerlen);
126 return -1;
127 }
128
129 s->crypto_header.length = headerlen;
130 s->crypto_header.offset = ret;
131
132
133
134 clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
135 assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen) == 0);
136 ret = bdrv_pwrite_zeroes(bs->file,
137 ret + headerlen,
138 clusterlen - headerlen, 0);
139 if (ret < 0) {
140 error_setg_errno(errp, -ret, "Could not zero fill encryption header");
141 return -1;
142 }
143
144 return ret;
145}
146
147
148static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
149 const uint8_t *buf, size_t buflen,
150 void *opaque, Error **errp)
151{
152 BlockDriverState *bs = opaque;
153 BDRVQcow2State *s = bs->opaque;
154 ssize_t ret;
155
156 if ((offset + buflen) > s->crypto_header.length) {
157 error_setg(errp, "Request for data outside of extension header");
158 return -1;
159 }
160
161 ret = bdrv_pwrite(bs->file,
162 s->crypto_header.offset + offset, buf, buflen);
163 if (ret < 0) {
164 error_setg_errno(errp, -ret, "Could not read encryption header");
165 return -1;
166 }
167 return ret;
168}
169
170
171
172
173
174
175
176
177
178static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
179 uint64_t end_offset, void **p_feature_table,
180 int flags, bool *need_update_header,
181 Error **errp)
182{
183 BDRVQcow2State *s = bs->opaque;
184 QCowExtension ext;
185 uint64_t offset;
186 int ret;
187 Qcow2BitmapHeaderExt bitmaps_ext;
188
189 if (need_update_header != NULL) {
190 *need_update_header = false;
191 }
192
193#ifdef DEBUG_EXT
194 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
195#endif
196 offset = start_offset;
197 while (offset < end_offset) {
198
199#ifdef DEBUG_EXT
200
201 if (offset > s->cluster_size)
202 printf("qcow2_read_extension: suspicious offset %lu\n", offset);
203
204 printf("attempting to read extended header in offset %lu\n", offset);
205#endif
206
207 ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
208 if (ret < 0) {
209 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
210 "pread fail from offset %" PRIu64, offset);
211 return 1;
212 }
213 ext.magic = be32_to_cpu(ext.magic);
214 ext.len = be32_to_cpu(ext.len);
215 offset += sizeof(ext);
216#ifdef DEBUG_EXT
217 printf("ext.magic = 0x%x\n", ext.magic);
218#endif
219 if (offset > end_offset || ext.len > end_offset - offset) {
220 error_setg(errp, "Header extension too large");
221 return -EINVAL;
222 }
223
224 switch (ext.magic) {
225 case QCOW2_EXT_MAGIC_END:
226 return 0;
227
228 case QCOW2_EXT_MAGIC_BACKING_FORMAT:
229 if (ext.len >= sizeof(bs->backing_format)) {
230 error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
231 " too large (>=%zu)", ext.len,
232 sizeof(bs->backing_format));
233 return 2;
234 }
235 ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
236 if (ret < 0) {
237 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
238 "Could not read format name");
239 return 3;
240 }
241 bs->backing_format[ext.len] = '\0';
242 s->image_backing_format = g_strdup(bs->backing_format);
243#ifdef DEBUG_EXT
244 printf("Qcow2: Got format extension %s\n", bs->backing_format);
245#endif
246 break;
247
248 case QCOW2_EXT_MAGIC_FEATURE_TABLE:
249 if (p_feature_table != NULL) {
250 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
251 ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
252 if (ret < 0) {
253 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
254 "Could not read table");
255 return ret;
256 }
257
258 *p_feature_table = feature_table;
259 }
260 break;
261
262 case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
263 unsigned int cflags = 0;
264 if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
265 error_setg(errp, "CRYPTO header extension only "
266 "expected with LUKS encryption method");
267 return -EINVAL;
268 }
269 if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
270 error_setg(errp, "CRYPTO header extension size %u, "
271 "but expected size %zu", ext.len,
272 sizeof(Qcow2CryptoHeaderExtension));
273 return -EINVAL;
274 }
275
276 ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len);
277 if (ret < 0) {
278 error_setg_errno(errp, -ret,
279 "Unable to read CRYPTO header extension");
280 return ret;
281 }
282 s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
283 s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
284
285 if ((s->crypto_header.offset % s->cluster_size) != 0) {
286 error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
287 "not a multiple of cluster size '%u'",
288 s->crypto_header.offset, s->cluster_size);
289 return -EINVAL;
290 }
291
292 if (flags & BDRV_O_NO_IO) {
293 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
294 }
295 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
296 qcow2_crypto_hdr_read_func,
297 bs, cflags, errp);
298 if (!s->crypto) {
299 return -EINVAL;
300 }
301 } break;
302
303 case QCOW2_EXT_MAGIC_BITMAPS:
304 if (ext.len != sizeof(bitmaps_ext)) {
305 error_setg_errno(errp, -ret, "bitmaps_ext: "
306 "Invalid extension length");
307 return -EINVAL;
308 }
309
310 if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
311 if (s->qcow_version < 3) {
312
313 warn_report("This qcow2 v2 image contains bitmaps, but "
314 "they may have been modified by a program "
315 "without persistent bitmap support; so now "
316 "they must all be considered inconsistent");
317 } else {
318 warn_report("a program lacking bitmap support "
319 "modified this file, so all bitmaps are now "
320 "considered inconsistent");
321 }
322 error_printf("Some clusters may be leaked, "
323 "run 'qemu-img check -r' on the image "
324 "file to fix.");
325 if (need_update_header != NULL) {
326
327 *need_update_header = true;
328 }
329 break;
330 }
331
332 ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len);
333 if (ret < 0) {
334 error_setg_errno(errp, -ret, "bitmaps_ext: "
335 "Could not read ext header");
336 return ret;
337 }
338
339 if (bitmaps_ext.reserved32 != 0) {
340 error_setg_errno(errp, -ret, "bitmaps_ext: "
341 "Reserved field is not zero");
342 return -EINVAL;
343 }
344
345 bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps);
346 bitmaps_ext.bitmap_directory_size =
347 be64_to_cpu(bitmaps_ext.bitmap_directory_size);
348 bitmaps_ext.bitmap_directory_offset =
349 be64_to_cpu(bitmaps_ext.bitmap_directory_offset);
350
351 if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
352 error_setg(errp,
353 "bitmaps_ext: Image has %" PRIu32 " bitmaps, "
354 "exceeding the QEMU supported maximum of %d",
355 bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
356 return -EINVAL;
357 }
358
359 if (bitmaps_ext.nb_bitmaps == 0) {
360 error_setg(errp, "found bitmaps extension with zero bitmaps");
361 return -EINVAL;
362 }
363
364 if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) {
365 error_setg(errp, "bitmaps_ext: "
366 "invalid bitmap directory offset");
367 return -EINVAL;
368 }
369
370 if (bitmaps_ext.bitmap_directory_size >
371 QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
372 error_setg(errp, "bitmaps_ext: "
373 "bitmap directory size (%" PRIu64 ") exceeds "
374 "the maximum supported size (%d)",
375 bitmaps_ext.bitmap_directory_size,
376 QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
377 return -EINVAL;
378 }
379
380 s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
381 s->bitmap_directory_offset =
382 bitmaps_ext.bitmap_directory_offset;
383 s->bitmap_directory_size =
384 bitmaps_ext.bitmap_directory_size;
385
386#ifdef DEBUG_EXT
387 printf("Qcow2: Got bitmaps extension: "
388 "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
389 s->bitmap_directory_offset, s->nb_bitmaps);
390#endif
391 break;
392
393 default:
394
395
396
397 {
398 Qcow2UnknownHeaderExtension *uext;
399
400 uext = g_malloc0(sizeof(*uext) + ext.len);
401 uext->magic = ext.magic;
402 uext->len = ext.len;
403 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
404
405 ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
406 if (ret < 0) {
407 error_setg_errno(errp, -ret, "ERROR: unknown extension: "
408 "Could not read data");
409 return ret;
410 }
411 }
412 break;
413 }
414
415 offset += ((ext.len + 7) & ~7);
416 }
417
418 return 0;
419}
420
421static void cleanup_unknown_header_ext(BlockDriverState *bs)
422{
423 BDRVQcow2State *s = bs->opaque;
424 Qcow2UnknownHeaderExtension *uext, *next;
425
426 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
427 QLIST_REMOVE(uext, next);
428 g_free(uext);
429 }
430}
431
432static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
433 uint64_t mask)
434{
435 char *features = g_strdup("");
436 char *old;
437
438 while (table && table->name[0] != '\0') {
439 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
440 if (mask & (1ULL << table->bit)) {
441 old = features;
442 features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
443 table->name);
444 g_free(old);
445 mask &= ~(1ULL << table->bit);
446 }
447 }
448 table++;
449 }
450
451 if (mask) {
452 old = features;
453 features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
454 old, *old ? ", " : "", mask);
455 g_free(old);
456 }
457
458 error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
459 g_free(features);
460}
461
462
463
464
465
466
467
468
469int qcow2_mark_dirty(BlockDriverState *bs)
470{
471 BDRVQcow2State *s = bs->opaque;
472 uint64_t val;
473 int ret;
474
475 assert(s->qcow_version >= 3);
476
477 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
478 return 0;
479 }
480
481 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
482 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
483 &val, sizeof(val));
484 if (ret < 0) {
485 return ret;
486 }
487 ret = bdrv_flush(bs->file->bs);
488 if (ret < 0) {
489 return ret;
490 }
491
492
493 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
494 return 0;
495}
496
497
498
499
500
501
502static int qcow2_mark_clean(BlockDriverState *bs)
503{
504 BDRVQcow2State *s = bs->opaque;
505
506 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
507 int ret;
508
509 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
510
511 ret = qcow2_flush_caches(bs);
512 if (ret < 0) {
513 return ret;
514 }
515
516 return qcow2_update_header(bs);
517 }
518 return 0;
519}
520
521
522
523
524int qcow2_mark_corrupt(BlockDriverState *bs)
525{
526 BDRVQcow2State *s = bs->opaque;
527
528 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
529 return qcow2_update_header(bs);
530}
531
532
533
534
535
536int qcow2_mark_consistent(BlockDriverState *bs)
537{
538 BDRVQcow2State *s = bs->opaque;
539
540 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
541 int ret = qcow2_flush_caches(bs);
542 if (ret < 0) {
543 return ret;
544 }
545
546 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
547 return qcow2_update_header(bs);
548 }
549 return 0;
550}
551
552static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
553 BdrvCheckResult *result,
554 BdrvCheckMode fix)
555{
556 int ret = qcow2_check_refcounts(bs, result, fix);
557 if (ret < 0) {
558 return ret;
559 }
560
561 if (fix && result->check_errors == 0 && result->corruptions == 0) {
562 ret = qcow2_mark_clean(bs);
563 if (ret < 0) {
564 return ret;
565 }
566 return qcow2_mark_consistent(bs);
567 }
568 return ret;
569}
570
571static int coroutine_fn qcow2_co_check(BlockDriverState *bs,
572 BdrvCheckResult *result,
573 BdrvCheckMode fix)
574{
575 BDRVQcow2State *s = bs->opaque;
576 int ret;
577
578 qemu_co_mutex_lock(&s->lock);
579 ret = qcow2_co_check_locked(bs, result, fix);
580 qemu_co_mutex_unlock(&s->lock);
581 return ret;
582}
583
584int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
585 uint64_t entries, size_t entry_len,
586 int64_t max_size_bytes, const char *table_name,
587 Error **errp)
588{
589 BDRVQcow2State *s = bs->opaque;
590
591 if (entries > max_size_bytes / entry_len) {
592 error_setg(errp, "%s too large", table_name);
593 return -EFBIG;
594 }
595
596
597
598 if ((INT64_MAX - entries * entry_len < offset) ||
599 (offset_into_cluster(s, offset) != 0)) {
600 error_setg(errp, "%s offset invalid", table_name);
601 return -EINVAL;
602 }
603
604 return 0;
605}
606
607static QemuOptsList qcow2_runtime_opts = {
608 .name = "qcow2",
609 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
610 .desc = {
611 {
612 .name = QCOW2_OPT_LAZY_REFCOUNTS,
613 .type = QEMU_OPT_BOOL,
614 .help = "Postpone refcount updates",
615 },
616 {
617 .name = QCOW2_OPT_DISCARD_REQUEST,
618 .type = QEMU_OPT_BOOL,
619 .help = "Pass guest discard requests to the layer below",
620 },
621 {
622 .name = QCOW2_OPT_DISCARD_SNAPSHOT,
623 .type = QEMU_OPT_BOOL,
624 .help = "Generate discard requests when snapshot related space "
625 "is freed",
626 },
627 {
628 .name = QCOW2_OPT_DISCARD_OTHER,
629 .type = QEMU_OPT_BOOL,
630 .help = "Generate discard requests when other clusters are freed",
631 },
632 {
633 .name = QCOW2_OPT_OVERLAP,
634 .type = QEMU_OPT_STRING,
635 .help = "Selects which overlap checks to perform from a range of "
636 "templates (none, constant, cached, all)",
637 },
638 {
639 .name = QCOW2_OPT_OVERLAP_TEMPLATE,
640 .type = QEMU_OPT_STRING,
641 .help = "Selects which overlap checks to perform from a range of "
642 "templates (none, constant, cached, all)",
643 },
644 {
645 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
646 .type = QEMU_OPT_BOOL,
647 .help = "Check for unintended writes into the main qcow2 header",
648 },
649 {
650 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
651 .type = QEMU_OPT_BOOL,
652 .help = "Check for unintended writes into the active L1 table",
653 },
654 {
655 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
656 .type = QEMU_OPT_BOOL,
657 .help = "Check for unintended writes into an active L2 table",
658 },
659 {
660 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
661 .type = QEMU_OPT_BOOL,
662 .help = "Check for unintended writes into the refcount table",
663 },
664 {
665 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
666 .type = QEMU_OPT_BOOL,
667 .help = "Check for unintended writes into a refcount block",
668 },
669 {
670 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
671 .type = QEMU_OPT_BOOL,
672 .help = "Check for unintended writes into the snapshot table",
673 },
674 {
675 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
676 .type = QEMU_OPT_BOOL,
677 .help = "Check for unintended writes into an inactive L1 table",
678 },
679 {
680 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
681 .type = QEMU_OPT_BOOL,
682 .help = "Check for unintended writes into an inactive L2 table",
683 },
684 {
685 .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
686 .type = QEMU_OPT_BOOL,
687 .help = "Check for unintended writes into the bitmap directory",
688 },
689 {
690 .name = QCOW2_OPT_CACHE_SIZE,
691 .type = QEMU_OPT_SIZE,
692 .help = "Maximum combined metadata (L2 tables and refcount blocks) "
693 "cache size",
694 },
695 {
696 .name = QCOW2_OPT_L2_CACHE_SIZE,
697 .type = QEMU_OPT_SIZE,
698 .help = "Maximum L2 table cache size",
699 },
700 {
701 .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
702 .type = QEMU_OPT_SIZE,
703 .help = "Size of each entry in the L2 cache",
704 },
705 {
706 .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
707 .type = QEMU_OPT_SIZE,
708 .help = "Maximum refcount block cache size",
709 },
710 {
711 .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
712 .type = QEMU_OPT_NUMBER,
713 .help = "Clean unused cache entries after this time (in seconds)",
714 },
715 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
716 "ID of secret providing qcow2 AES key or LUKS passphrase"),
717 { }
718 },
719};
720
721static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
722 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER,
723 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1,
724 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2,
725 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
726 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
727 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
728 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1,
729 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
730 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
731};
732
733static void cache_clean_timer_cb(void *opaque)
734{
735 BlockDriverState *bs = opaque;
736 BDRVQcow2State *s = bs->opaque;
737 qcow2_cache_clean_unused(s->l2_table_cache);
738 qcow2_cache_clean_unused(s->refcount_block_cache);
739 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
740 (int64_t) s->cache_clean_interval * 1000);
741}
742
743static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
744{
745 BDRVQcow2State *s = bs->opaque;
746 if (s->cache_clean_interval > 0) {
747 s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
748 SCALE_MS, cache_clean_timer_cb,
749 bs);
750 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
751 (int64_t) s->cache_clean_interval * 1000);
752 }
753}
754
755static void cache_clean_timer_del(BlockDriverState *bs)
756{
757 BDRVQcow2State *s = bs->opaque;
758 if (s->cache_clean_timer) {
759 timer_del(s->cache_clean_timer);
760 timer_free(s->cache_clean_timer);
761 s->cache_clean_timer = NULL;
762 }
763}
764
765static void qcow2_detach_aio_context(BlockDriverState *bs)
766{
767 cache_clean_timer_del(bs);
768}
769
770static void qcow2_attach_aio_context(BlockDriverState *bs,
771 AioContext *new_context)
772{
773 cache_clean_timer_init(bs, new_context);
774}
775
776static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
777 uint64_t *l2_cache_size,
778 uint64_t *l2_cache_entry_size,
779 uint64_t *refcount_cache_size, Error **errp)
780{
781 BDRVQcow2State *s = bs->opaque;
782 uint64_t combined_cache_size, l2_cache_max_setting;
783 bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
784 int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
785 uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
786 uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
787
788 combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
789 l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
790 refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
791
792 combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
793 l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE,
794 DEFAULT_L2_CACHE_MAX_SIZE);
795 *refcount_cache_size = qemu_opt_get_size(opts,
796 QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
797
798 *l2_cache_entry_size = qemu_opt_get_size(
799 opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
800
801 *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);
802
803 if (combined_cache_size_set) {
804 if (l2_cache_size_set && refcount_cache_size_set) {
805 error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
806 " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
807 "at the same time");
808 return;
809 } else if (l2_cache_size_set &&
810 (l2_cache_max_setting > combined_cache_size)) {
811 error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
812 QCOW2_OPT_CACHE_SIZE);
813 return;
814 } else if (*refcount_cache_size > combined_cache_size) {
815 error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
816 QCOW2_OPT_CACHE_SIZE);
817 return;
818 }
819
820 if (l2_cache_size_set) {
821 *refcount_cache_size = combined_cache_size - *l2_cache_size;
822 } else if (refcount_cache_size_set) {
823 *l2_cache_size = combined_cache_size - *refcount_cache_size;
824 } else {
825
826
827 if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
828 *l2_cache_size = max_l2_cache;
829 *refcount_cache_size = combined_cache_size - *l2_cache_size;
830 } else {
831 *refcount_cache_size =
832 MIN(combined_cache_size, min_refcount_cache);
833 *l2_cache_size = combined_cache_size - *refcount_cache_size;
834 }
835 }
836 }
837
838
839
840 if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
841 *l2_cache_entry_size > s->cluster_size ||
842 !is_power_of_2(*l2_cache_entry_size)) {
843 error_setg(errp, "L2 cache entry size must be a power of two "
844 "between %d and the cluster size (%d)",
845 1 << MIN_CLUSTER_BITS, s->cluster_size);
846 return;
847 }
848}
849
850typedef struct Qcow2ReopenState {
851 Qcow2Cache *l2_table_cache;
852 Qcow2Cache *refcount_block_cache;
853 int l2_slice_size;
854 bool use_lazy_refcounts;
855 int overlap_check;
856 bool discard_passthrough[QCOW2_DISCARD_MAX];
857 uint64_t cache_clean_interval;
858 QCryptoBlockOpenOptions *crypto_opts;
859} Qcow2ReopenState;
860
861static int qcow2_update_options_prepare(BlockDriverState *bs,
862 Qcow2ReopenState *r,
863 QDict *options, int flags,
864 Error **errp)
865{
866 BDRVQcow2State *s = bs->opaque;
867 QemuOpts *opts = NULL;
868 const char *opt_overlap_check, *opt_overlap_check_template;
869 int overlap_check_template = 0;
870 uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
871 int i;
872 const char *encryptfmt;
873 QDict *encryptopts = NULL;
874 Error *local_err = NULL;
875 int ret;
876
877 qdict_extract_subqdict(options, &encryptopts, "encrypt.");
878 encryptfmt = qdict_get_try_str(encryptopts, "format");
879
880 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
881 qemu_opts_absorb_qdict(opts, options, &local_err);
882 if (local_err) {
883 error_propagate(errp, local_err);
884 ret = -EINVAL;
885 goto fail;
886 }
887
888
889 read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
890 &refcount_cache_size, &local_err);
891 if (local_err) {
892 error_propagate(errp, local_err);
893 ret = -EINVAL;
894 goto fail;
895 }
896
897 l2_cache_size /= l2_cache_entry_size;
898 if (l2_cache_size < MIN_L2_CACHE_SIZE) {
899 l2_cache_size = MIN_L2_CACHE_SIZE;
900 }
901 if (l2_cache_size > INT_MAX) {
902 error_setg(errp, "L2 cache size too big");
903 ret = -EINVAL;
904 goto fail;
905 }
906
907 refcount_cache_size /= s->cluster_size;
908 if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
909 refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
910 }
911 if (refcount_cache_size > INT_MAX) {
912 error_setg(errp, "Refcount cache size too big");
913 ret = -EINVAL;
914 goto fail;
915 }
916
917
918 if (s->l2_table_cache) {
919 ret = qcow2_cache_flush(bs, s->l2_table_cache);
920 if (ret) {
921 error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
922 goto fail;
923 }
924 }
925
926 if (s->refcount_block_cache) {
927 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
928 if (ret) {
929 error_setg_errno(errp, -ret,
930 "Failed to flush the refcount block cache");
931 goto fail;
932 }
933 }
934
935 r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t);
936 r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
937 l2_cache_entry_size);
938 r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
939 s->cluster_size);
940 if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
941 error_setg(errp, "Could not allocate metadata caches");
942 ret = -ENOMEM;
943 goto fail;
944 }
945
946
947 r->cache_clean_interval =
948 qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
949 DEFAULT_CACHE_CLEAN_INTERVAL);
950#ifndef CONFIG_LINUX
951 if (r->cache_clean_interval != 0) {
952 error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
953 " not supported on this host");
954 ret = -EINVAL;
955 goto fail;
956 }
957#endif
958 if (r->cache_clean_interval > UINT_MAX) {
959 error_setg(errp, "Cache clean interval too big");
960 ret = -EINVAL;
961 goto fail;
962 }
963
964
965 r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
966 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
967 if (r->use_lazy_refcounts && s->qcow_version < 3) {
968 error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
969 "qemu 1.1 compatibility level");
970 ret = -EINVAL;
971 goto fail;
972 }
973
974 if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
975 ret = qcow2_mark_clean(bs);
976 if (ret < 0) {
977 error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
978 goto fail;
979 }
980 }
981
982
983 opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
984 opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
985 if (opt_overlap_check_template && opt_overlap_check &&
986 strcmp(opt_overlap_check_template, opt_overlap_check))
987 {
988 error_setg(errp, "Conflicting values for qcow2 options '"
989 QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
990 "' ('%s')", opt_overlap_check, opt_overlap_check_template);
991 ret = -EINVAL;
992 goto fail;
993 }
994 if (!opt_overlap_check) {
995 opt_overlap_check = opt_overlap_check_template ?: "cached";
996 }
997
998 if (!strcmp(opt_overlap_check, "none")) {
999 overlap_check_template = 0;
1000 } else if (!strcmp(opt_overlap_check, "constant")) {
1001 overlap_check_template = QCOW2_OL_CONSTANT;
1002 } else if (!strcmp(opt_overlap_check, "cached")) {
1003 overlap_check_template = QCOW2_OL_CACHED;
1004 } else if (!strcmp(opt_overlap_check, "all")) {
1005 overlap_check_template = QCOW2_OL_ALL;
1006 } else {
1007 error_setg(errp, "Unsupported value '%s' for qcow2 option "
1008 "'overlap-check'. Allowed are any of the following: "
1009 "none, constant, cached, all", opt_overlap_check);
1010 ret = -EINVAL;
1011 goto fail;
1012 }
1013
1014 r->overlap_check = 0;
1015 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
1016
1017
1018 r->overlap_check |=
1019 qemu_opt_get_bool(opts, overlap_bool_option_names[i],
1020 overlap_check_template & (1 << i)) << i;
1021 }
1022
1023 r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
1024 r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
1025 r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
1026 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
1027 flags & BDRV_O_UNMAP);
1028 r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
1029 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
1030 r->discard_passthrough[QCOW2_DISCARD_OTHER] =
1031 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
1032
1033 switch (s->crypt_method_header) {
1034 case QCOW_CRYPT_NONE:
1035 if (encryptfmt) {
1036 error_setg(errp, "No encryption in image header, but options "
1037 "specified format '%s'", encryptfmt);
1038 ret = -EINVAL;
1039 goto fail;
1040 }
1041 break;
1042
1043 case QCOW_CRYPT_AES:
1044 if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
1045 error_setg(errp,
1046 "Header reported 'aes' encryption format but "
1047 "options specify '%s'", encryptfmt);
1048 ret = -EINVAL;
1049 goto fail;
1050 }
1051 qdict_put_str(encryptopts, "format", "qcow");
1052 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1053 break;
1054
1055 case QCOW_CRYPT_LUKS:
1056 if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
1057 error_setg(errp,
1058 "Header reported 'luks' encryption format but "
1059 "options specify '%s'", encryptfmt);
1060 ret = -EINVAL;
1061 goto fail;
1062 }
1063 qdict_put_str(encryptopts, "format", "luks");
1064 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1065 break;
1066
1067 default:
1068 error_setg(errp, "Unsupported encryption method %d",
1069 s->crypt_method_header);
1070 break;
1071 }
1072 if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) {
1073 ret = -EINVAL;
1074 goto fail;
1075 }
1076
1077 ret = 0;
1078fail:
1079 qobject_unref(encryptopts);
1080 qemu_opts_del(opts);
1081 opts = NULL;
1082 return ret;
1083}
1084
1085static void qcow2_update_options_commit(BlockDriverState *bs,
1086 Qcow2ReopenState *r)
1087{
1088 BDRVQcow2State *s = bs->opaque;
1089 int i;
1090
1091 if (s->l2_table_cache) {
1092 qcow2_cache_destroy(s->l2_table_cache);
1093 }
1094 if (s->refcount_block_cache) {
1095 qcow2_cache_destroy(s->refcount_block_cache);
1096 }
1097 s->l2_table_cache = r->l2_table_cache;
1098 s->refcount_block_cache = r->refcount_block_cache;
1099 s->l2_slice_size = r->l2_slice_size;
1100
1101 s->overlap_check = r->overlap_check;
1102 s->use_lazy_refcounts = r->use_lazy_refcounts;
1103
1104 for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
1105 s->discard_passthrough[i] = r->discard_passthrough[i];
1106 }
1107
1108 if (s->cache_clean_interval != r->cache_clean_interval) {
1109 cache_clean_timer_del(bs);
1110 s->cache_clean_interval = r->cache_clean_interval;
1111 cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
1112 }
1113
1114 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1115 s->crypto_opts = r->crypto_opts;
1116}
1117
1118static void qcow2_update_options_abort(BlockDriverState *bs,
1119 Qcow2ReopenState *r)
1120{
1121 if (r->l2_table_cache) {
1122 qcow2_cache_destroy(r->l2_table_cache);
1123 }
1124 if (r->refcount_block_cache) {
1125 qcow2_cache_destroy(r->refcount_block_cache);
1126 }
1127 qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
1128}
1129
1130static int qcow2_update_options(BlockDriverState *bs, QDict *options,
1131 int flags, Error **errp)
1132{
1133 Qcow2ReopenState r = {};
1134 int ret;
1135
1136 ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
1137 if (ret >= 0) {
1138 qcow2_update_options_commit(bs, &r);
1139 } else {
1140 qcow2_update_options_abort(bs, &r);
1141 }
1142
1143 return ret;
1144}
1145
1146
1147static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
1148 int flags, Error **errp)
1149{
1150 BDRVQcow2State *s = bs->opaque;
1151 unsigned int len, i;
1152 int ret = 0;
1153 QCowHeader header;
1154 Error *local_err = NULL;
1155 uint64_t ext_end;
1156 uint64_t l1_vm_state_index;
1157 bool update_header = false;
1158
1159 ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
1160 if (ret < 0) {
1161 error_setg_errno(errp, -ret, "Could not read qcow2 header");
1162 goto fail;
1163 }
1164 header.magic = be32_to_cpu(header.magic);
1165 header.version = be32_to_cpu(header.version);
1166 header.backing_file_offset = be64_to_cpu(header.backing_file_offset);
1167 header.backing_file_size = be32_to_cpu(header.backing_file_size);
1168 header.size = be64_to_cpu(header.size);
1169 header.cluster_bits = be32_to_cpu(header.cluster_bits);
1170 header.crypt_method = be32_to_cpu(header.crypt_method);
1171 header.l1_table_offset = be64_to_cpu(header.l1_table_offset);
1172 header.l1_size = be32_to_cpu(header.l1_size);
1173 header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset);
1174 header.refcount_table_clusters =
1175 be32_to_cpu(header.refcount_table_clusters);
1176 header.snapshots_offset = be64_to_cpu(header.snapshots_offset);
1177 header.nb_snapshots = be32_to_cpu(header.nb_snapshots);
1178
1179 if (header.magic != QCOW_MAGIC) {
1180 error_setg(errp, "Image is not in qcow2 format");
1181 ret = -EINVAL;
1182 goto fail;
1183 }
1184 if (header.version < 2 || header.version > 3) {
1185 error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
1186 ret = -ENOTSUP;
1187 goto fail;
1188 }
1189
1190 s->qcow_version = header.version;
1191
1192
1193 if (header.cluster_bits < MIN_CLUSTER_BITS ||
1194 header.cluster_bits > MAX_CLUSTER_BITS) {
1195 error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
1196 header.cluster_bits);
1197 ret = -EINVAL;
1198 goto fail;
1199 }
1200
1201 s->cluster_bits = header.cluster_bits;
1202 s->cluster_size = 1 << s->cluster_bits;
1203 s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS);
1204
1205
1206 if (header.version == 2) {
1207 header.incompatible_features = 0;
1208 header.compatible_features = 0;
1209 header.autoclear_features = 0;
1210 header.refcount_order = 4;
1211 header.header_length = 72;
1212 } else {
1213 header.incompatible_features =
1214 be64_to_cpu(header.incompatible_features);
1215 header.compatible_features = be64_to_cpu(header.compatible_features);
1216 header.autoclear_features = be64_to_cpu(header.autoclear_features);
1217 header.refcount_order = be32_to_cpu(header.refcount_order);
1218 header.header_length = be32_to_cpu(header.header_length);
1219
1220 if (header.header_length < 104) {
1221 error_setg(errp, "qcow2 header too short");
1222 ret = -EINVAL;
1223 goto fail;
1224 }
1225 }
1226
1227 if (header.header_length > s->cluster_size) {
1228 error_setg(errp, "qcow2 header exceeds cluster size");
1229 ret = -EINVAL;
1230 goto fail;
1231 }
1232
1233 if (header.header_length > sizeof(header)) {
1234 s->unknown_header_fields_size = header.header_length - sizeof(header);
1235 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
1236 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
1237 s->unknown_header_fields_size);
1238 if (ret < 0) {
1239 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
1240 "fields");
1241 goto fail;
1242 }
1243 }
1244
1245 if (header.backing_file_offset > s->cluster_size) {
1246 error_setg(errp, "Invalid backing file offset");
1247 ret = -EINVAL;
1248 goto fail;
1249 }
1250
1251 if (header.backing_file_offset) {
1252 ext_end = header.backing_file_offset;
1253 } else {
1254 ext_end = 1 << header.cluster_bits;
1255 }
1256
1257
1258 s->incompatible_features = header.incompatible_features;
1259 s->compatible_features = header.compatible_features;
1260 s->autoclear_features = header.autoclear_features;
1261
1262 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
1263 void *feature_table = NULL;
1264 qcow2_read_extensions(bs, header.header_length, ext_end,
1265 &feature_table, flags, NULL, NULL);
1266 report_unsupported_feature(errp, feature_table,
1267 s->incompatible_features &
1268 ~QCOW2_INCOMPAT_MASK);
1269 ret = -ENOTSUP;
1270 g_free(feature_table);
1271 goto fail;
1272 }
1273
1274 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
1275
1276
1277 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
1278 error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
1279 "read/write");
1280 ret = -EACCES;
1281 goto fail;
1282 }
1283 }
1284
1285
1286 if (header.refcount_order > 6) {
1287 error_setg(errp, "Reference count entry width too large; may not "
1288 "exceed 64 bits");
1289 ret = -EINVAL;
1290 goto fail;
1291 }
1292 s->refcount_order = header.refcount_order;
1293 s->refcount_bits = 1 << s->refcount_order;
1294 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
1295 s->refcount_max += s->refcount_max - 1;
1296
1297 s->crypt_method_header = header.crypt_method;
1298 if (s->crypt_method_header) {
1299 if (bdrv_uses_whitelist() &&
1300 s->crypt_method_header == QCOW_CRYPT_AES) {
1301 error_setg(errp,
1302 "Use of AES-CBC encrypted qcow2 images is no longer "
1303 "supported in system emulators");
1304 error_append_hint(errp,
1305 "You can use 'qemu-img convert' to convert your "
1306 "image to an alternative supported format, such "
1307 "as unencrypted qcow2, or raw with the LUKS "
1308 "format instead.\n");
1309 ret = -ENOSYS;
1310 goto fail;
1311 }
1312
1313 if (s->crypt_method_header == QCOW_CRYPT_AES) {
1314 s->crypt_physical_offset = false;
1315 } else {
1316
1317
1318
1319 s->crypt_physical_offset = true;
1320 }
1321
1322 bs->encrypted = true;
1323 }
1324
1325 s->l2_bits = s->cluster_bits - 3;
1326 s->l2_size = 1 << s->l2_bits;
1327
1328 s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
1329 s->refcount_block_size = 1 << s->refcount_block_bits;
1330 bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
1331 s->csize_shift = (62 - (s->cluster_bits - 8));
1332 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
1333 s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
1334
1335 s->refcount_table_offset = header.refcount_table_offset;
1336 s->refcount_table_size =
1337 header.refcount_table_clusters << (s->cluster_bits - 3);
1338
1339 if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
1340 error_setg(errp, "Image does not contain a reference count table");
1341 ret = -EINVAL;
1342 goto fail;
1343 }
1344
1345 ret = qcow2_validate_table(bs, s->refcount_table_offset,
1346 header.refcount_table_clusters,
1347 s->cluster_size, QCOW_MAX_REFTABLE_SIZE,
1348 "Reference count table", errp);
1349 if (ret < 0) {
1350 goto fail;
1351 }
1352
1353
1354
1355
1356
1357 ret = qcow2_validate_table(bs, header.snapshots_offset,
1358 header.nb_snapshots,
1359 sizeof(QCowSnapshotHeader),
1360 sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
1361 "Snapshot table", errp);
1362 if (ret < 0) {
1363 goto fail;
1364 }
1365
1366
1367 ret = qcow2_validate_table(bs, header.l1_table_offset,
1368 header.l1_size, sizeof(uint64_t),
1369 QCOW_MAX_L1_SIZE, "Active L1 table", errp);
1370 if (ret < 0) {
1371 goto fail;
1372 }
1373 s->l1_size = header.l1_size;
1374 s->l1_table_offset = header.l1_table_offset;
1375
1376 l1_vm_state_index = size_to_l1(s, header.size);
1377 if (l1_vm_state_index > INT_MAX) {
1378 error_setg(errp, "Image is too big");
1379 ret = -EFBIG;
1380 goto fail;
1381 }
1382 s->l1_vm_state_index = l1_vm_state_index;
1383
1384
1385
1386 if (s->l1_size < s->l1_vm_state_index) {
1387 error_setg(errp, "L1 table is too small");
1388 ret = -EINVAL;
1389 goto fail;
1390 }
1391
1392 if (s->l1_size > 0) {
1393 s->l1_table = qemu_try_blockalign(bs->file->bs,
1394 ROUND_UP(s->l1_size * sizeof(uint64_t), 512));
1395 if (s->l1_table == NULL) {
1396 error_setg(errp, "Could not allocate L1 table");
1397 ret = -ENOMEM;
1398 goto fail;
1399 }
1400 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
1401 s->l1_size * sizeof(uint64_t));
1402 if (ret < 0) {
1403 error_setg_errno(errp, -ret, "Could not read L1 table");
1404 goto fail;
1405 }
1406 for(i = 0;i < s->l1_size; i++) {
1407 s->l1_table[i] = be64_to_cpu(s->l1_table[i]);
1408 }
1409 }
1410
1411
1412 ret = qcow2_update_options(bs, options, flags, errp);
1413 if (ret < 0) {
1414 goto fail;
1415 }
1416
1417 s->cluster_cache_offset = -1;
1418 s->flags = flags;
1419
1420 ret = qcow2_refcount_init(bs);
1421 if (ret != 0) {
1422 error_setg_errno(errp, -ret, "Could not initialize refcount handling");
1423 goto fail;
1424 }
1425
1426 QLIST_INIT(&s->cluster_allocs);
1427 QTAILQ_INIT(&s->discards);
1428
1429
1430 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
1431 flags, &update_header, &local_err)) {
1432 error_propagate(errp, local_err);
1433 ret = -EINVAL;
1434 goto fail;
1435 }
1436
1437
1438
1439
1440
1441 if (s->crypt_method_header && !s->crypto) {
1442 if (s->crypt_method_header == QCOW_CRYPT_AES) {
1443 unsigned int cflags = 0;
1444 if (flags & BDRV_O_NO_IO) {
1445 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
1446 }
1447 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
1448 NULL, NULL, cflags, errp);
1449 if (!s->crypto) {
1450 ret = -EINVAL;
1451 goto fail;
1452 }
1453 } else if (!(flags & BDRV_O_NO_IO)) {
1454 error_setg(errp, "Missing CRYPTO header for crypt method %d",
1455 s->crypt_method_header);
1456 ret = -EINVAL;
1457 goto fail;
1458 }
1459 }
1460
1461
1462 if (header.backing_file_offset != 0) {
1463 len = header.backing_file_size;
1464 if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
1465 len >= sizeof(bs->backing_file)) {
1466 error_setg(errp, "Backing file name too long");
1467 ret = -EINVAL;
1468 goto fail;
1469 }
1470 ret = bdrv_pread(bs->file, header.backing_file_offset,
1471 bs->backing_file, len);
1472 if (ret < 0) {
1473 error_setg_errno(errp, -ret, "Could not read backing file name");
1474 goto fail;
1475 }
1476 bs->backing_file[len] = '\0';
1477 s->image_backing_file = g_strdup(bs->backing_file);
1478 }
1479
1480
1481 s->snapshots_offset = header.snapshots_offset;
1482 s->nb_snapshots = header.nb_snapshots;
1483
1484 ret = qcow2_read_snapshots(bs);
1485 if (ret < 0) {
1486 error_setg_errno(errp, -ret, "Could not read snapshots");
1487 goto fail;
1488 }
1489
1490
1491 update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
1492 update_header =
1493 update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
1494 if (update_header) {
1495 s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
1496 }
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556 if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
1557
1558 bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err);
1559
1560 update_header = update_header && !header_updated;
1561 }
1562 if (local_err != NULL) {
1563 error_propagate(errp, local_err);
1564 ret = -EINVAL;
1565 goto fail;
1566 }
1567
1568 if (update_header) {
1569 ret = qcow2_update_header(bs);
1570 if (ret < 0) {
1571 error_setg_errno(errp, -ret, "Could not update qcow2 header");
1572 goto fail;
1573 }
1574 }
1575
1576 bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0;
1577
1578
1579 if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
1580 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
1581 BdrvCheckResult result = {0};
1582
1583 ret = qcow2_co_check_locked(bs, &result,
1584 BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
1585 if (ret < 0 || result.check_errors) {
1586 if (ret >= 0) {
1587 ret = -EIO;
1588 }
1589 error_setg_errno(errp, -ret, "Could not repair dirty image");
1590 goto fail;
1591 }
1592 }
1593
1594#ifdef DEBUG_ALLOC
1595 {
1596 BdrvCheckResult result = {0};
1597 qcow2_check_refcounts(bs, &result, 0);
1598 }
1599#endif
1600
1601 qemu_co_queue_init(&s->compress_wait_queue);
1602
1603 return ret;
1604
1605 fail:
1606 g_free(s->unknown_header_fields);
1607 cleanup_unknown_header_ext(bs);
1608 qcow2_free_snapshots(bs);
1609 qcow2_refcount_close(bs);
1610 qemu_vfree(s->l1_table);
1611
1612 s->l1_table = NULL;
1613 cache_clean_timer_del(bs);
1614 if (s->l2_table_cache) {
1615 qcow2_cache_destroy(s->l2_table_cache);
1616 }
1617 if (s->refcount_block_cache) {
1618 qcow2_cache_destroy(s->refcount_block_cache);
1619 }
1620 qcrypto_block_free(s->crypto);
1621 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1622 return ret;
1623}
1624
1625typedef struct QCow2OpenCo {
1626 BlockDriverState *bs;
1627 QDict *options;
1628 int flags;
1629 Error **errp;
1630 int ret;
1631} QCow2OpenCo;
1632
1633static void coroutine_fn qcow2_open_entry(void *opaque)
1634{
1635 QCow2OpenCo *qoc = opaque;
1636 BDRVQcow2State *s = qoc->bs->opaque;
1637
1638 qemu_co_mutex_lock(&s->lock);
1639 qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp);
1640 qemu_co_mutex_unlock(&s->lock);
1641}
1642
1643static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
1644 Error **errp)
1645{
1646 BDRVQcow2State *s = bs->opaque;
1647 QCow2OpenCo qoc = {
1648 .bs = bs,
1649 .options = options,
1650 .flags = flags,
1651 .errp = errp,
1652 .ret = -EINPROGRESS
1653 };
1654
1655 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
1656 false, errp);
1657 if (!bs->file) {
1658 return -EINVAL;
1659 }
1660
1661
1662 qemu_co_mutex_init(&s->lock);
1663
1664 if (qemu_in_coroutine()) {
1665
1666 qcow2_open_entry(&qoc);
1667 } else {
1668 qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
1669 BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
1670 }
1671 return qoc.ret;
1672}
1673
1674static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
1675{
1676 BDRVQcow2State *s = bs->opaque;
1677
1678 if (bs->encrypted) {
1679
1680 bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
1681 }
1682 bs->bl.pwrite_zeroes_alignment = s->cluster_size;
1683 bs->bl.pdiscard_alignment = s->cluster_size;
1684}
1685
1686static int qcow2_reopen_prepare(BDRVReopenState *state,
1687 BlockReopenQueue *queue, Error **errp)
1688{
1689 Qcow2ReopenState *r;
1690 int ret;
1691
1692 r = g_new0(Qcow2ReopenState, 1);
1693 state->opaque = r;
1694
1695 ret = qcow2_update_options_prepare(state->bs, r, state->options,
1696 state->flags, errp);
1697 if (ret < 0) {
1698 goto fail;
1699 }
1700
1701
1702 if ((state->flags & BDRV_O_RDWR) == 0) {
1703 ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
1704 if (ret < 0) {
1705 goto fail;
1706 }
1707
1708 ret = bdrv_flush(state->bs);
1709 if (ret < 0) {
1710 goto fail;
1711 }
1712
1713 ret = qcow2_mark_clean(state->bs);
1714 if (ret < 0) {
1715 goto fail;
1716 }
1717 }
1718
1719 return 0;
1720
1721fail:
1722 qcow2_update_options_abort(state->bs, r);
1723 g_free(r);
1724 return ret;
1725}
1726
1727static void qcow2_reopen_commit(BDRVReopenState *state)
1728{
1729 qcow2_update_options_commit(state->bs, state->opaque);
1730 g_free(state->opaque);
1731}
1732
1733static void qcow2_reopen_abort(BDRVReopenState *state)
1734{
1735 qcow2_update_options_abort(state->bs, state->opaque);
1736 g_free(state->opaque);
1737}
1738
1739static void qcow2_join_options(QDict *options, QDict *old_options)
1740{
1741 bool has_new_overlap_template =
1742 qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
1743 qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
1744 bool has_new_total_cache_size =
1745 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
1746 bool has_all_cache_options;
1747
1748
1749 if (has_new_overlap_template) {
1750 qdict_del(old_options, QCOW2_OPT_OVERLAP);
1751 qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
1752 qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
1753 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
1754 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
1755 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
1756 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
1757 qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
1758 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
1759 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
1760 }
1761
1762
1763 if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
1764 qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
1765 qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1766 }
1767
1768 qdict_join(options, old_options, false);
1769
1770
1771
1772
1773
1774
1775 has_all_cache_options =
1776 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
1777 qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
1778 qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1779
1780 if (has_all_cache_options && !has_new_total_cache_size) {
1781 qdict_del(options, QCOW2_OPT_CACHE_SIZE);
1782 }
1783}
1784
1785static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
1786 bool want_zero,
1787 int64_t offset, int64_t count,
1788 int64_t *pnum, int64_t *map,
1789 BlockDriverState **file)
1790{
1791 BDRVQcow2State *s = bs->opaque;
1792 uint64_t cluster_offset;
1793 int index_in_cluster, ret;
1794 unsigned int bytes;
1795 int status = 0;
1796
1797 bytes = MIN(INT_MAX, count);
1798 qemu_co_mutex_lock(&s->lock);
1799 ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
1800 qemu_co_mutex_unlock(&s->lock);
1801 if (ret < 0) {
1802 return ret;
1803 }
1804
1805 *pnum = bytes;
1806
1807 if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
1808 !s->crypto) {
1809 index_in_cluster = offset & (s->cluster_size - 1);
1810 *map = cluster_offset | index_in_cluster;
1811 *file = bs->file->bs;
1812 status |= BDRV_BLOCK_OFFSET_VALID;
1813 }
1814 if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
1815 status |= BDRV_BLOCK_ZERO;
1816 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
1817 status |= BDRV_BLOCK_DATA;
1818 }
1819 return status;
1820}
1821
1822static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
1823 QCowL2Meta **pl2meta,
1824 bool link_l2)
1825{
1826 int ret = 0;
1827 QCowL2Meta *l2meta = *pl2meta;
1828
1829 while (l2meta != NULL) {
1830 QCowL2Meta *next;
1831
1832 if (link_l2) {
1833 ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
1834 if (ret) {
1835 goto out;
1836 }
1837 } else {
1838 qcow2_alloc_cluster_abort(bs, l2meta);
1839 }
1840
1841
1842 if (l2meta->nb_clusters != 0) {
1843 QLIST_REMOVE(l2meta, next_in_flight);
1844 }
1845
1846 qemu_co_queue_restart_all(&l2meta->dependent_requests);
1847
1848 next = l2meta->next;
1849 g_free(l2meta);
1850 l2meta = next;
1851 }
1852out:
1853 *pl2meta = l2meta;
1854 return ret;
1855}
1856
1857static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
1858 uint64_t bytes, QEMUIOVector *qiov,
1859 int flags)
1860{
1861 BDRVQcow2State *s = bs->opaque;
1862 int offset_in_cluster;
1863 int ret;
1864 unsigned int cur_bytes;
1865 uint64_t cluster_offset = 0;
1866 uint64_t bytes_done = 0;
1867 QEMUIOVector hd_qiov;
1868 uint8_t *cluster_data = NULL;
1869
1870 qemu_iovec_init(&hd_qiov, qiov->niov);
1871
1872 qemu_co_mutex_lock(&s->lock);
1873
1874 while (bytes != 0) {
1875
1876
1877 cur_bytes = MIN(bytes, INT_MAX);
1878 if (s->crypto) {
1879 cur_bytes = MIN(cur_bytes,
1880 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
1881 }
1882
1883 ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
1884 if (ret < 0) {
1885 goto fail;
1886 }
1887
1888 offset_in_cluster = offset_into_cluster(s, offset);
1889
1890 qemu_iovec_reset(&hd_qiov);
1891 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
1892
1893 switch (ret) {
1894 case QCOW2_CLUSTER_UNALLOCATED:
1895
1896 if (bs->backing) {
1897 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
1898 qemu_co_mutex_unlock(&s->lock);
1899 ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
1900 &hd_qiov, 0);
1901 qemu_co_mutex_lock(&s->lock);
1902 if (ret < 0) {
1903 goto fail;
1904 }
1905 } else {
1906
1907 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
1908 }
1909 break;
1910
1911 case QCOW2_CLUSTER_ZERO_PLAIN:
1912 case QCOW2_CLUSTER_ZERO_ALLOC:
1913 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
1914 break;
1915
1916 case QCOW2_CLUSTER_COMPRESSED:
1917
1918 ret = qcow2_decompress_cluster(bs, cluster_offset);
1919 if (ret < 0) {
1920 goto fail;
1921 }
1922
1923 qemu_iovec_from_buf(&hd_qiov, 0,
1924 s->cluster_cache + offset_in_cluster,
1925 cur_bytes);
1926 break;
1927
1928 case QCOW2_CLUSTER_NORMAL:
1929 if ((cluster_offset & 511) != 0) {
1930 ret = -EIO;
1931 goto fail;
1932 }
1933
1934 if (bs->encrypted) {
1935 assert(s->crypto);
1936
1937
1938
1939
1940
1941 if (!cluster_data) {
1942 cluster_data =
1943 qemu_try_blockalign(bs->file->bs,
1944 QCOW_MAX_CRYPT_CLUSTERS
1945 * s->cluster_size);
1946 if (cluster_data == NULL) {
1947 ret = -ENOMEM;
1948 goto fail;
1949 }
1950 }
1951
1952 assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
1953 qemu_iovec_reset(&hd_qiov);
1954 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
1955 }
1956
1957 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
1958 qemu_co_mutex_unlock(&s->lock);
1959 ret = bdrv_co_preadv(bs->file,
1960 cluster_offset + offset_in_cluster,
1961 cur_bytes, &hd_qiov, 0);
1962 qemu_co_mutex_lock(&s->lock);
1963 if (ret < 0) {
1964 goto fail;
1965 }
1966 if (bs->encrypted) {
1967 assert(s->crypto);
1968 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
1969 assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
1970 if (qcrypto_block_decrypt(s->crypto,
1971 (s->crypt_physical_offset ?
1972 cluster_offset + offset_in_cluster :
1973 offset),
1974 cluster_data,
1975 cur_bytes,
1976 NULL) < 0) {
1977 ret = -EIO;
1978 goto fail;
1979 }
1980 qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
1981 }
1982 break;
1983
1984 default:
1985 g_assert_not_reached();
1986 ret = -EIO;
1987 goto fail;
1988 }
1989
1990 bytes -= cur_bytes;
1991 offset += cur_bytes;
1992 bytes_done += cur_bytes;
1993 }
1994 ret = 0;
1995
1996fail:
1997 qemu_co_mutex_unlock(&s->lock);
1998
1999 qemu_iovec_destroy(&hd_qiov);
2000 qemu_vfree(cluster_data);
2001
2002 return ret;
2003}
2004
2005
2006
2007static bool merge_cow(uint64_t offset, unsigned bytes,
2008 QEMUIOVector *hd_qiov, QCowL2Meta *l2meta)
2009{
2010 QCowL2Meta *m;
2011
2012 for (m = l2meta; m != NULL; m = m->next) {
2013
2014 if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
2015 continue;
2016 }
2017
2018
2019
2020 if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
2021 continue;
2022 }
2023
2024
2025
2026 if (m->offset + m->cow_end.offset != offset + bytes) {
2027 continue;
2028 }
2029
2030
2031
2032 if (hd_qiov->niov > IOV_MAX - 2) {
2033 continue;
2034 }
2035
2036 m->data_qiov = hd_qiov;
2037 return true;
2038 }
2039
2040 return false;
2041}
2042
2043static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
2044 uint64_t bytes, QEMUIOVector *qiov,
2045 int flags)
2046{
2047 BDRVQcow2State *s = bs->opaque;
2048 int offset_in_cluster;
2049 int ret;
2050 unsigned int cur_bytes;
2051 uint64_t cluster_offset;
2052 QEMUIOVector hd_qiov;
2053 uint64_t bytes_done = 0;
2054 uint8_t *cluster_data = NULL;
2055 QCowL2Meta *l2meta = NULL;
2056
2057 trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
2058
2059 qemu_iovec_init(&hd_qiov, qiov->niov);
2060
2061 s->cluster_cache_offset = -1;
2062
2063 qemu_co_mutex_lock(&s->lock);
2064
2065 while (bytes != 0) {
2066
2067 l2meta = NULL;
2068
2069 trace_qcow2_writev_start_part(qemu_coroutine_self());
2070 offset_in_cluster = offset_into_cluster(s, offset);
2071 cur_bytes = MIN(bytes, INT_MAX);
2072 if (bs->encrypted) {
2073 cur_bytes = MIN(cur_bytes,
2074 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
2075 - offset_in_cluster);
2076 }
2077
2078 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
2079 &cluster_offset, &l2meta);
2080 if (ret < 0) {
2081 goto fail;
2082 }
2083
2084 assert((cluster_offset & 511) == 0);
2085
2086 qemu_iovec_reset(&hd_qiov);
2087 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
2088
2089 if (bs->encrypted) {
2090 assert(s->crypto);
2091 if (!cluster_data) {
2092 cluster_data = qemu_try_blockalign(bs->file->bs,
2093 QCOW_MAX_CRYPT_CLUSTERS
2094 * s->cluster_size);
2095 if (cluster_data == NULL) {
2096 ret = -ENOMEM;
2097 goto fail;
2098 }
2099 }
2100
2101 assert(hd_qiov.size <=
2102 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2103 qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
2104
2105 if (qcrypto_block_encrypt(s->crypto,
2106 (s->crypt_physical_offset ?
2107 cluster_offset + offset_in_cluster :
2108 offset),
2109 cluster_data,
2110 cur_bytes, NULL) < 0) {
2111 ret = -EIO;
2112 goto fail;
2113 }
2114
2115 qemu_iovec_reset(&hd_qiov);
2116 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
2117 }
2118
2119 ret = qcow2_pre_write_overlap_check(bs, 0,
2120 cluster_offset + offset_in_cluster, cur_bytes);
2121 if (ret < 0) {
2122 goto fail;
2123 }
2124
2125
2126
2127
2128
2129 if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
2130 qemu_co_mutex_unlock(&s->lock);
2131 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
2132 trace_qcow2_writev_data(qemu_coroutine_self(),
2133 cluster_offset + offset_in_cluster);
2134 ret = bdrv_co_pwritev(bs->file,
2135 cluster_offset + offset_in_cluster,
2136 cur_bytes, &hd_qiov, 0);
2137 qemu_co_mutex_lock(&s->lock);
2138 if (ret < 0) {
2139 goto fail;
2140 }
2141 }
2142
2143 ret = qcow2_handle_l2meta(bs, &l2meta, true);
2144 if (ret) {
2145 goto fail;
2146 }
2147
2148 bytes -= cur_bytes;
2149 offset += cur_bytes;
2150 bytes_done += cur_bytes;
2151 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
2152 }
2153 ret = 0;
2154
2155fail:
2156 qcow2_handle_l2meta(bs, &l2meta, false);
2157
2158 qemu_co_mutex_unlock(&s->lock);
2159
2160 qemu_iovec_destroy(&hd_qiov);
2161 qemu_vfree(cluster_data);
2162 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
2163
2164 return ret;
2165}
2166
2167static int qcow2_inactivate(BlockDriverState *bs)
2168{
2169 BDRVQcow2State *s = bs->opaque;
2170 int ret, result = 0;
2171 Error *local_err = NULL;
2172
2173 qcow2_store_persistent_dirty_bitmaps(bs, &local_err);
2174 if (local_err != NULL) {
2175 result = -EINVAL;
2176 error_reportf_err(local_err, "Lost persistent bitmaps during "
2177 "inactivation of node '%s': ",
2178 bdrv_get_device_or_node_name(bs));
2179 }
2180
2181 ret = qcow2_cache_flush(bs, s->l2_table_cache);
2182 if (ret) {
2183 result = ret;
2184 error_report("Failed to flush the L2 table cache: %s",
2185 strerror(-ret));
2186 }
2187
2188 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
2189 if (ret) {
2190 result = ret;
2191 error_report("Failed to flush the refcount block cache: %s",
2192 strerror(-ret));
2193 }
2194
2195 if (result == 0) {
2196 qcow2_mark_clean(bs);
2197 }
2198
2199 return result;
2200}
2201
2202static void qcow2_close(BlockDriverState *bs)
2203{
2204 BDRVQcow2State *s = bs->opaque;
2205 qemu_vfree(s->l1_table);
2206
2207 s->l1_table = NULL;
2208
2209 if (!(s->flags & BDRV_O_INACTIVE)) {
2210 qcow2_inactivate(bs);
2211 }
2212
2213 cache_clean_timer_del(bs);
2214 qcow2_cache_destroy(s->l2_table_cache);
2215 qcow2_cache_destroy(s->refcount_block_cache);
2216
2217 qcrypto_block_free(s->crypto);
2218 s->crypto = NULL;
2219
2220 g_free(s->unknown_header_fields);
2221 cleanup_unknown_header_ext(bs);
2222
2223 g_free(s->image_backing_file);
2224 g_free(s->image_backing_format);
2225
2226 g_free(s->cluster_cache);
2227 qemu_vfree(s->cluster_data);
2228 qcow2_refcount_close(bs);
2229 qcow2_free_snapshots(bs);
2230}
2231
2232static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
2233 Error **errp)
2234{
2235 BDRVQcow2State *s = bs->opaque;
2236 int flags = s->flags;
2237 QCryptoBlock *crypto = NULL;
2238 QDict *options;
2239 Error *local_err = NULL;
2240 int ret;
2241
2242
2243
2244
2245
2246
2247 crypto = s->crypto;
2248 s->crypto = NULL;
2249
2250 qcow2_close(bs);
2251
2252 memset(s, 0, sizeof(BDRVQcow2State));
2253 options = qdict_clone_shallow(bs->options);
2254
2255 flags &= ~BDRV_O_INACTIVE;
2256 qemu_co_mutex_lock(&s->lock);
2257 ret = qcow2_do_open(bs, options, flags, &local_err);
2258 qemu_co_mutex_unlock(&s->lock);
2259 qobject_unref(options);
2260 if (local_err) {
2261 error_propagate_prepend(errp, local_err,
2262 "Could not reopen qcow2 layer: ");
2263 bs->drv = NULL;
2264 return;
2265 } else if (ret < 0) {
2266 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
2267 bs->drv = NULL;
2268 return;
2269 }
2270
2271 s->crypto = crypto;
2272}
2273
2274static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
2275 size_t len, size_t buflen)
2276{
2277 QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
2278 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
2279
2280 if (buflen < ext_len) {
2281 return -ENOSPC;
2282 }
2283
2284 *ext_backing_fmt = (QCowExtension) {
2285 .magic = cpu_to_be32(magic),
2286 .len = cpu_to_be32(len),
2287 };
2288
2289 if (len) {
2290 memcpy(buf + sizeof(QCowExtension), s, len);
2291 }
2292
2293 return ext_len;
2294}
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304int qcow2_update_header(BlockDriverState *bs)
2305{
2306 BDRVQcow2State *s = bs->opaque;
2307 QCowHeader *header;
2308 char *buf;
2309 size_t buflen = s->cluster_size;
2310 int ret;
2311 uint64_t total_size;
2312 uint32_t refcount_table_clusters;
2313 size_t header_length;
2314 Qcow2UnknownHeaderExtension *uext;
2315
2316 buf = qemu_blockalign(bs, buflen);
2317
2318
2319 header = (QCowHeader*) buf;
2320
2321 if (buflen < sizeof(*header)) {
2322 ret = -ENOSPC;
2323 goto fail;
2324 }
2325
2326 header_length = sizeof(*header) + s->unknown_header_fields_size;
2327 total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
2328 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
2329
2330 *header = (QCowHeader) {
2331
2332 .magic = cpu_to_be32(QCOW_MAGIC),
2333 .version = cpu_to_be32(s->qcow_version),
2334 .backing_file_offset = 0,
2335 .backing_file_size = 0,
2336 .cluster_bits = cpu_to_be32(s->cluster_bits),
2337 .size = cpu_to_be64(total_size),
2338 .crypt_method = cpu_to_be32(s->crypt_method_header),
2339 .l1_size = cpu_to_be32(s->l1_size),
2340 .l1_table_offset = cpu_to_be64(s->l1_table_offset),
2341 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
2342 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
2343 .nb_snapshots = cpu_to_be32(s->nb_snapshots),
2344 .snapshots_offset = cpu_to_be64(s->snapshots_offset),
2345
2346
2347 .incompatible_features = cpu_to_be64(s->incompatible_features),
2348 .compatible_features = cpu_to_be64(s->compatible_features),
2349 .autoclear_features = cpu_to_be64(s->autoclear_features),
2350 .refcount_order = cpu_to_be32(s->refcount_order),
2351 .header_length = cpu_to_be32(header_length),
2352 };
2353
2354
2355 switch (s->qcow_version) {
2356 case 2:
2357 ret = offsetof(QCowHeader, incompatible_features);
2358 break;
2359 case 3:
2360 ret = sizeof(*header);
2361 break;
2362 default:
2363 ret = -EINVAL;
2364 goto fail;
2365 }
2366
2367 buf += ret;
2368 buflen -= ret;
2369 memset(buf, 0, buflen);
2370
2371
2372 if (s->unknown_header_fields_size) {
2373 if (buflen < s->unknown_header_fields_size) {
2374 ret = -ENOSPC;
2375 goto fail;
2376 }
2377
2378 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
2379 buf += s->unknown_header_fields_size;
2380 buflen -= s->unknown_header_fields_size;
2381 }
2382
2383
2384 if (s->image_backing_format) {
2385 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
2386 s->image_backing_format,
2387 strlen(s->image_backing_format),
2388 buflen);
2389 if (ret < 0) {
2390 goto fail;
2391 }
2392
2393 buf += ret;
2394 buflen -= ret;
2395 }
2396
2397
2398 if (s->crypto_header.offset != 0) {
2399 s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset);
2400 s->crypto_header.length = cpu_to_be64(s->crypto_header.length);
2401 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
2402 &s->crypto_header, sizeof(s->crypto_header),
2403 buflen);
2404 s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
2405 s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
2406 if (ret < 0) {
2407 goto fail;
2408 }
2409 buf += ret;
2410 buflen -= ret;
2411 }
2412
2413
2414 if (s->qcow_version >= 3) {
2415 Qcow2Feature features[] = {
2416 {
2417 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2418 .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
2419 .name = "dirty bit",
2420 },
2421 {
2422 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2423 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
2424 .name = "corrupt bit",
2425 },
2426 {
2427 .type = QCOW2_FEAT_TYPE_COMPATIBLE,
2428 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
2429 .name = "lazy refcounts",
2430 },
2431 };
2432
2433 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
2434 features, sizeof(features), buflen);
2435 if (ret < 0) {
2436 goto fail;
2437 }
2438 buf += ret;
2439 buflen -= ret;
2440 }
2441
2442
2443 if (s->nb_bitmaps > 0) {
2444 Qcow2BitmapHeaderExt bitmaps_header = {
2445 .nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
2446 .bitmap_directory_size =
2447 cpu_to_be64(s->bitmap_directory_size),
2448 .bitmap_directory_offset =
2449 cpu_to_be64(s->bitmap_directory_offset)
2450 };
2451 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
2452 &bitmaps_header, sizeof(bitmaps_header),
2453 buflen);
2454 if (ret < 0) {
2455 goto fail;
2456 }
2457 buf += ret;
2458 buflen -= ret;
2459 }
2460
2461
2462 QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
2463 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
2464 if (ret < 0) {
2465 goto fail;
2466 }
2467
2468 buf += ret;
2469 buflen -= ret;
2470 }
2471
2472
2473 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
2474 if (ret < 0) {
2475 goto fail;
2476 }
2477
2478 buf += ret;
2479 buflen -= ret;
2480
2481
2482 if (s->image_backing_file) {
2483 size_t backing_file_len = strlen(s->image_backing_file);
2484
2485 if (buflen < backing_file_len) {
2486 ret = -ENOSPC;
2487 goto fail;
2488 }
2489
2490
2491 strncpy(buf, s->image_backing_file, buflen);
2492
2493 header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
2494 header->backing_file_size = cpu_to_be32(backing_file_len);
2495 }
2496
2497
2498 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
2499 if (ret < 0) {
2500 goto fail;
2501 }
2502
2503 ret = 0;
2504fail:
2505 qemu_vfree(header);
2506 return ret;
2507}
2508
2509static int qcow2_change_backing_file(BlockDriverState *bs,
2510 const char *backing_file, const char *backing_fmt)
2511{
2512 BDRVQcow2State *s = bs->opaque;
2513
2514 if (backing_file && strlen(backing_file) > 1023) {
2515 return -EINVAL;
2516 }
2517
2518 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2519 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2520
2521 g_free(s->image_backing_file);
2522 g_free(s->image_backing_format);
2523
2524 s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
2525 s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
2526
2527 return qcow2_update_header(bs);
2528}
2529
2530static int qcow2_crypt_method_from_format(const char *encryptfmt)
2531{
2532 if (g_str_equal(encryptfmt, "luks")) {
2533 return QCOW_CRYPT_LUKS;
2534 } else if (g_str_equal(encryptfmt, "aes")) {
2535 return QCOW_CRYPT_AES;
2536 } else {
2537 return -EINVAL;
2538 }
2539}
2540
2541static int qcow2_set_up_encryption(BlockDriverState *bs,
2542 QCryptoBlockCreateOptions *cryptoopts,
2543 Error **errp)
2544{
2545 BDRVQcow2State *s = bs->opaque;
2546 QCryptoBlock *crypto = NULL;
2547 int fmt, ret;
2548
2549 switch (cryptoopts->format) {
2550 case Q_CRYPTO_BLOCK_FORMAT_LUKS:
2551 fmt = QCOW_CRYPT_LUKS;
2552 break;
2553 case Q_CRYPTO_BLOCK_FORMAT_QCOW:
2554 fmt = QCOW_CRYPT_AES;
2555 break;
2556 default:
2557 error_setg(errp, "Crypto format not supported in qcow2");
2558 return -EINVAL;
2559 }
2560
2561 s->crypt_method_header = fmt;
2562
2563 crypto = qcrypto_block_create(cryptoopts, "encrypt.",
2564 qcow2_crypto_hdr_init_func,
2565 qcow2_crypto_hdr_write_func,
2566 bs, errp);
2567 if (!crypto) {
2568 return -EINVAL;
2569 }
2570
2571 ret = qcow2_update_header(bs);
2572 if (ret < 0) {
2573 error_setg_errno(errp, -ret, "Could not write encryption header");
2574 goto out;
2575 }
2576
2577 ret = 0;
2578 out:
2579 qcrypto_block_free(crypto);
2580 return ret;
2581}
2582
2583
2584
2585
2586
2587
2588
2589
2590static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
2591 uint64_t new_length)
2592{
2593 uint64_t bytes;
2594 uint64_t host_offset = 0;
2595 unsigned int cur_bytes;
2596 int ret;
2597 QCowL2Meta *meta;
2598
2599 assert(offset <= new_length);
2600 bytes = new_length - offset;
2601
2602 while (bytes) {
2603 cur_bytes = MIN(bytes, INT_MAX);
2604 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
2605 &host_offset, &meta);
2606 if (ret < 0) {
2607 return ret;
2608 }
2609
2610 while (meta) {
2611 QCowL2Meta *next = meta->next;
2612
2613 ret = qcow2_alloc_cluster_link_l2(bs, meta);
2614 if (ret < 0) {
2615 qcow2_free_any_clusters(bs, meta->alloc_offset,
2616 meta->nb_clusters, QCOW2_DISCARD_NEVER);
2617 return ret;
2618 }
2619
2620
2621
2622 QLIST_REMOVE(meta, next_in_flight);
2623
2624 g_free(meta);
2625 meta = next;
2626 }
2627
2628
2629
2630 bytes -= cur_bytes;
2631 offset += cur_bytes;
2632 }
2633
2634
2635
2636
2637
2638
2639 if (host_offset != 0) {
2640 uint8_t data = 0;
2641 ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1,
2642 &data, 1);
2643 if (ret < 0) {
2644 return ret;
2645 }
2646 }
2647
2648 return 0;
2649}
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
2661 int refcount_order, bool generous_increase,
2662 uint64_t *refblock_count)
2663{
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673 int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t);
2674 int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
2675 int64_t table = 0;
2676 int64_t blocks = 0;
2677 int64_t last;
2678 int64_t n = 0;
2679
2680 do {
2681 last = n;
2682 blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
2683 table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
2684 n = clusters + blocks + table;
2685
2686 if (n == last && generous_increase) {
2687 clusters += DIV_ROUND_UP(table, 2);
2688 n = 0;
2689 generous_increase = false;
2690 }
2691 } while (n != last);
2692
2693 if (refblock_count) {
2694 *refblock_count = blocks;
2695 }
2696
2697 return (blocks + table) * cluster_size;
2698}
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709static int64_t qcow2_calc_prealloc_size(int64_t total_size,
2710 size_t cluster_size,
2711 int refcount_order)
2712{
2713 int64_t meta_size = 0;
2714 uint64_t nl1e, nl2e;
2715 int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
2716
2717
2718 meta_size += cluster_size;
2719
2720
2721 nl2e = aligned_total_size / cluster_size;
2722 nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t));
2723 meta_size += nl2e * sizeof(uint64_t);
2724
2725
2726 nl1e = nl2e * sizeof(uint64_t) / cluster_size;
2727 nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t));
2728 meta_size += nl1e * sizeof(uint64_t);
2729
2730
2731 meta_size += qcow2_refcount_metadata_size(
2732 (meta_size + aligned_total_size) / cluster_size,
2733 cluster_size, refcount_order, false, NULL);
2734
2735 return meta_size + aligned_total_size;
2736}
2737
2738static bool validate_cluster_size(size_t cluster_size, Error **errp)
2739{
2740 int cluster_bits = ctz32(cluster_size);
2741 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
2742 (1 << cluster_bits) != cluster_size)
2743 {
2744 error_setg(errp, "Cluster size must be a power of two between %d and "
2745 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
2746 return false;
2747 }
2748 return true;
2749}
2750
2751static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp)
2752{
2753 size_t cluster_size;
2754
2755 cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
2756 DEFAULT_CLUSTER_SIZE);
2757 if (!validate_cluster_size(cluster_size, errp)) {
2758 return 0;
2759 }
2760 return cluster_size;
2761}
2762
2763static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
2764{
2765 char *buf;
2766 int ret;
2767
2768 buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
2769 if (!buf) {
2770 ret = 3;
2771 } else if (!strcmp(buf, "0.10")) {
2772 ret = 2;
2773 } else if (!strcmp(buf, "1.1")) {
2774 ret = 3;
2775 } else {
2776 error_setg(errp, "Invalid compatibility level: '%s'", buf);
2777 ret = -EINVAL;
2778 }
2779 g_free(buf);
2780 return ret;
2781}
2782
2783static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
2784 Error **errp)
2785{
2786 uint64_t refcount_bits;
2787
2788 refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
2789 if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
2790 error_setg(errp, "Refcount width must be a power of two and may not "
2791 "exceed 64 bits");
2792 return 0;
2793 }
2794
2795 if (version < 3 && refcount_bits != 16) {
2796 error_setg(errp, "Different refcount widths than 16 bits require "
2797 "compatibility level 1.1 or above (use compat=1.1 or "
2798 "greater)");
2799 return 0;
2800 }
2801
2802 return refcount_bits;
2803}
2804
2805static int coroutine_fn
2806qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
2807{
2808 BlockdevCreateOptionsQcow2 *qcow2_opts;
2809 QDict *options;
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823 BlockBackend *blk = NULL;
2824 BlockDriverState *bs = NULL;
2825 QCowHeader *header;
2826 size_t cluster_size;
2827 int version;
2828 int refcount_order;
2829 uint64_t* refcount_table;
2830 Error *local_err = NULL;
2831 int ret;
2832
2833 assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
2834 qcow2_opts = &create_options->u.qcow2;
2835
2836 bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp);
2837 if (bs == NULL) {
2838 return -EIO;
2839 }
2840
2841
2842 if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) {
2843 error_setg(errp, "Image size must be a multiple of 512 bytes");
2844 ret = -EINVAL;
2845 goto out;
2846 }
2847
2848 if (qcow2_opts->has_version) {
2849 switch (qcow2_opts->version) {
2850 case BLOCKDEV_QCOW2_VERSION_V2:
2851 version = 2;
2852 break;
2853 case BLOCKDEV_QCOW2_VERSION_V3:
2854 version = 3;
2855 break;
2856 default:
2857 g_assert_not_reached();
2858 }
2859 } else {
2860 version = 3;
2861 }
2862
2863 if (qcow2_opts->has_cluster_size) {
2864 cluster_size = qcow2_opts->cluster_size;
2865 } else {
2866 cluster_size = DEFAULT_CLUSTER_SIZE;
2867 }
2868
2869 if (!validate_cluster_size(cluster_size, errp)) {
2870 ret = -EINVAL;
2871 goto out;
2872 }
2873
2874 if (!qcow2_opts->has_preallocation) {
2875 qcow2_opts->preallocation = PREALLOC_MODE_OFF;
2876 }
2877 if (qcow2_opts->has_backing_file &&
2878 qcow2_opts->preallocation != PREALLOC_MODE_OFF)
2879 {
2880 error_setg(errp, "Backing file and preallocation cannot be used at "
2881 "the same time");
2882 ret = -EINVAL;
2883 goto out;
2884 }
2885 if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) {
2886 error_setg(errp, "Backing format cannot be used without backing file");
2887 ret = -EINVAL;
2888 goto out;
2889 }
2890
2891 if (!qcow2_opts->has_lazy_refcounts) {
2892 qcow2_opts->lazy_refcounts = false;
2893 }
2894 if (version < 3 && qcow2_opts->lazy_refcounts) {
2895 error_setg(errp, "Lazy refcounts only supported with compatibility "
2896 "level 1.1 and above (use version=v3 or greater)");
2897 ret = -EINVAL;
2898 goto out;
2899 }
2900
2901 if (!qcow2_opts->has_refcount_bits) {
2902 qcow2_opts->refcount_bits = 16;
2903 }
2904 if (qcow2_opts->refcount_bits > 64 ||
2905 !is_power_of_2(qcow2_opts->refcount_bits))
2906 {
2907 error_setg(errp, "Refcount width must be a power of two and may not "
2908 "exceed 64 bits");
2909 ret = -EINVAL;
2910 goto out;
2911 }
2912 if (version < 3 && qcow2_opts->refcount_bits != 16) {
2913 error_setg(errp, "Different refcount widths than 16 bits require "
2914 "compatibility level 1.1 or above (use version=v3 or "
2915 "greater)");
2916 ret = -EINVAL;
2917 goto out;
2918 }
2919 refcount_order = ctz32(qcow2_opts->refcount_bits);
2920
2921
2922
2923 blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
2924 ret = blk_insert_bs(blk, bs, errp);
2925 if (ret < 0) {
2926 goto out;
2927 }
2928 blk_set_allow_write_beyond_eof(blk, true);
2929
2930
2931 ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp);
2932 if (ret < 0) {
2933 goto out;
2934 }
2935
2936 if (qcow2_opts->preallocation == PREALLOC_MODE_FULL ||
2937 qcow2_opts->preallocation == PREALLOC_MODE_FALLOC)
2938 {
2939 int64_t prealloc_size =
2940 qcow2_calc_prealloc_size(qcow2_opts->size, cluster_size,
2941 refcount_order);
2942
2943 ret = blk_truncate(blk, prealloc_size, qcow2_opts->preallocation, errp);
2944 if (ret < 0) {
2945 goto out;
2946 }
2947 }
2948
2949
2950 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
2951 header = g_malloc0(cluster_size);
2952 *header = (QCowHeader) {
2953 .magic = cpu_to_be32(QCOW_MAGIC),
2954 .version = cpu_to_be32(version),
2955 .cluster_bits = cpu_to_be32(ctz32(cluster_size)),
2956 .size = cpu_to_be64(0),
2957 .l1_table_offset = cpu_to_be64(0),
2958 .l1_size = cpu_to_be32(0),
2959 .refcount_table_offset = cpu_to_be64(cluster_size),
2960 .refcount_table_clusters = cpu_to_be32(1),
2961 .refcount_order = cpu_to_be32(refcount_order),
2962 .header_length = cpu_to_be32(sizeof(*header)),
2963 };
2964
2965
2966 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
2967
2968 if (qcow2_opts->lazy_refcounts) {
2969 header->compatible_features |=
2970 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
2971 }
2972
2973 ret = blk_pwrite(blk, 0, header, cluster_size, 0);
2974 g_free(header);
2975 if (ret < 0) {
2976 error_setg_errno(errp, -ret, "Could not write qcow2 header");
2977 goto out;
2978 }
2979
2980
2981 refcount_table = g_malloc0(2 * cluster_size);
2982 refcount_table[0] = cpu_to_be64(2 * cluster_size);
2983 ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
2984 g_free(refcount_table);
2985
2986 if (ret < 0) {
2987 error_setg_errno(errp, -ret, "Could not write refcount table");
2988 goto out;
2989 }
2990
2991 blk_unref(blk);
2992 blk = NULL;
2993
2994
2995
2996
2997
2998
2999 options = qdict_new();
3000 qdict_put_str(options, "driver", "qcow2");
3001 qdict_put_str(options, "file", bs->node_name);
3002 blk = blk_new_open(NULL, NULL, options,
3003 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
3004 &local_err);
3005 if (blk == NULL) {
3006 error_propagate(errp, local_err);
3007 ret = -EIO;
3008 goto out;
3009 }
3010
3011 ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
3012 if (ret < 0) {
3013 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
3014 "header and refcount table");
3015 goto out;
3016
3017 } else if (ret != 0) {
3018 error_report("Huh, first cluster in empty image is already in use?");
3019 abort();
3020 }
3021
3022
3023 ret = qcow2_update_header(blk_bs(blk));
3024 if (ret < 0) {
3025 error_setg_errno(errp, -ret, "Could not update qcow2 header");
3026 goto out;
3027 }
3028
3029
3030 ret = blk_truncate(blk, qcow2_opts->size, PREALLOC_MODE_OFF, errp);
3031 if (ret < 0) {
3032 error_prepend(errp, "Could not resize image: ");
3033 goto out;
3034 }
3035
3036
3037 if (qcow2_opts->has_backing_file) {
3038 const char *backing_format = NULL;
3039
3040 if (qcow2_opts->has_backing_fmt) {
3041 backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt);
3042 }
3043
3044 ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
3045 backing_format);
3046 if (ret < 0) {
3047 error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
3048 "with format '%s'", qcow2_opts->backing_file,
3049 backing_format);
3050 goto out;
3051 }
3052 }
3053
3054
3055 if (qcow2_opts->has_encrypt) {
3056 ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
3057 if (ret < 0) {
3058 goto out;
3059 }
3060 }
3061
3062
3063 if (qcow2_opts->preallocation != PREALLOC_MODE_OFF) {
3064 BDRVQcow2State *s = blk_bs(blk)->opaque;
3065 qemu_co_mutex_lock(&s->lock);
3066 ret = preallocate_co(blk_bs(blk), 0, qcow2_opts->size);
3067 qemu_co_mutex_unlock(&s->lock);
3068
3069 if (ret < 0) {
3070 error_setg_errno(errp, -ret, "Could not preallocate metadata");
3071 goto out;
3072 }
3073 }
3074
3075 blk_unref(blk);
3076 blk = NULL;
3077
3078
3079
3080
3081
3082
3083
3084 options = qdict_new();
3085 qdict_put_str(options, "driver", "qcow2");
3086 qdict_put_str(options, "file", bs->node_name);
3087 blk = blk_new_open(NULL, NULL, options,
3088 BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
3089 &local_err);
3090 if (blk == NULL) {
3091 error_propagate(errp, local_err);
3092 ret = -EIO;
3093 goto out;
3094 }
3095
3096 ret = 0;
3097out:
3098 blk_unref(blk);
3099 bdrv_unref(bs);
3100 return ret;
3101}
3102
3103static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts,
3104 Error **errp)
3105{
3106 BlockdevCreateOptions *create_options = NULL;
3107 QDict *qdict;
3108 Visitor *v;
3109 BlockDriverState *bs = NULL;
3110 Error *local_err = NULL;
3111 const char *val;
3112 int ret;
3113
3114
3115
3116
3117
3118 qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts,
3119 true);
3120
3121
3122 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT);
3123 if (val && !strcmp(val, "on")) {
3124 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow");
3125 } else if (val && !strcmp(val, "off")) {
3126 qdict_del(qdict, BLOCK_OPT_ENCRYPT);
3127 }
3128
3129 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT);
3130 if (val && !strcmp(val, "aes")) {
3131 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow");
3132 }
3133
3134
3135
3136 val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL);
3137 if (val && !strcmp(val, "0.10")) {
3138 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2");
3139 } else if (val && !strcmp(val, "1.1")) {
3140 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
3141 }
3142
3143
3144 static const QDictRenames opt_renames[] = {
3145 { BLOCK_OPT_BACKING_FILE, "backing-file" },
3146 { BLOCK_OPT_BACKING_FMT, "backing-fmt" },
3147 { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" },
3148 { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" },
3149 { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" },
3150 { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT },
3151 { BLOCK_OPT_COMPAT_LEVEL, "version" },
3152 { NULL, NULL },
3153 };
3154
3155 if (!qdict_rename_keys(qdict, opt_renames, errp)) {
3156 ret = -EINVAL;
3157 goto finish;
3158 }
3159
3160
3161 ret = bdrv_create_file(filename, opts, errp);
3162 if (ret < 0) {
3163 goto finish;
3164 }
3165
3166 bs = bdrv_open(filename, NULL, NULL,
3167 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
3168 if (bs == NULL) {
3169 ret = -EIO;
3170 goto finish;
3171 }
3172
3173
3174 qdict_put_str(qdict, "driver", "qcow2");
3175 qdict_put_str(qdict, "file", bs->node_name);
3176
3177
3178 v = qobject_input_visitor_new_flat_confused(qdict, errp);
3179 if (!v) {
3180 ret = -EINVAL;
3181 goto finish;
3182 }
3183
3184 visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
3185 visit_free(v);
3186
3187 if (local_err) {
3188 error_propagate(errp, local_err);
3189 ret = -EINVAL;
3190 goto finish;
3191 }
3192
3193
3194 create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size,
3195 BDRV_SECTOR_SIZE);
3196
3197
3198 ret = qcow2_co_create(create_options, errp);
3199 if (ret < 0) {
3200 goto finish;
3201 }
3202
3203 ret = 0;
3204finish:
3205 qobject_unref(qdict);
3206 bdrv_unref(bs);
3207 qapi_free_BlockdevCreateOptions(create_options);
3208 return ret;
3209}
3210
3211
3212static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
3213{
3214 int64_t nr;
3215 int res;
3216
3217
3218 if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
3219 bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
3220 }
3221
3222 if (!bytes) {
3223 return true;
3224 }
3225 res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
3226 return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
3227}
3228
3229static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
3230 int64_t offset, int bytes, BdrvRequestFlags flags)
3231{
3232 int ret;
3233 BDRVQcow2State *s = bs->opaque;
3234
3235 uint32_t head = offset % s->cluster_size;
3236 uint32_t tail = (offset + bytes) % s->cluster_size;
3237
3238 trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
3239 if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
3240 tail = 0;
3241 }
3242
3243 if (head || tail) {
3244 uint64_t off;
3245 unsigned int nr;
3246
3247 assert(head + bytes <= s->cluster_size);
3248
3249
3250 if (!(is_zero(bs, offset - head, head) &&
3251 is_zero(bs, offset + bytes,
3252 tail ? s->cluster_size - tail : 0))) {
3253 return -ENOTSUP;
3254 }
3255
3256 qemu_co_mutex_lock(&s->lock);
3257
3258 offset = QEMU_ALIGN_DOWN(offset, s->cluster_size);
3259 bytes = s->cluster_size;
3260 nr = s->cluster_size;
3261 ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
3262 if (ret != QCOW2_CLUSTER_UNALLOCATED &&
3263 ret != QCOW2_CLUSTER_ZERO_PLAIN &&
3264 ret != QCOW2_CLUSTER_ZERO_ALLOC) {
3265 qemu_co_mutex_unlock(&s->lock);
3266 return -ENOTSUP;
3267 }
3268 } else {
3269 qemu_co_mutex_lock(&s->lock);
3270 }
3271
3272 trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
3273
3274
3275 ret = qcow2_cluster_zeroize(bs, offset, bytes, flags);
3276 qemu_co_mutex_unlock(&s->lock);
3277
3278 return ret;
3279}
3280
3281static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
3282 int64_t offset, int bytes)
3283{
3284 int ret;
3285 BDRVQcow2State *s = bs->opaque;
3286
3287 if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
3288 assert(bytes < s->cluster_size);
3289
3290
3291 if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
3292 offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
3293 return -ENOTSUP;
3294 }
3295 }
3296
3297 qemu_co_mutex_lock(&s->lock);
3298 ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
3299 false);
3300 qemu_co_mutex_unlock(&s->lock);
3301 return ret;
3302}
3303
3304static int coroutine_fn
3305qcow2_co_copy_range_from(BlockDriverState *bs,
3306 BdrvChild *src, uint64_t src_offset,
3307 BdrvChild *dst, uint64_t dst_offset,
3308 uint64_t bytes, BdrvRequestFlags read_flags,
3309 BdrvRequestFlags write_flags)
3310{
3311 BDRVQcow2State *s = bs->opaque;
3312 int ret;
3313 unsigned int cur_bytes;
3314 BdrvChild *child = NULL;
3315 BdrvRequestFlags cur_write_flags;
3316
3317 assert(!bs->encrypted);
3318 qemu_co_mutex_lock(&s->lock);
3319
3320 while (bytes != 0) {
3321 uint64_t copy_offset = 0;
3322
3323 cur_bytes = MIN(bytes, INT_MAX);
3324 cur_write_flags = write_flags;
3325
3326 ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset);
3327 if (ret < 0) {
3328 goto out;
3329 }
3330
3331 switch (ret) {
3332 case QCOW2_CLUSTER_UNALLOCATED:
3333 if (bs->backing && bs->backing->bs) {
3334 int64_t backing_length = bdrv_getlength(bs->backing->bs);
3335 if (src_offset >= backing_length) {
3336 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3337 } else {
3338 child = bs->backing;
3339 cur_bytes = MIN(cur_bytes, backing_length - src_offset);
3340 copy_offset = src_offset;
3341 }
3342 } else {
3343 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3344 }
3345 break;
3346
3347 case QCOW2_CLUSTER_ZERO_PLAIN:
3348 case QCOW2_CLUSTER_ZERO_ALLOC:
3349 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3350 break;
3351
3352 case QCOW2_CLUSTER_COMPRESSED:
3353 ret = -ENOTSUP;
3354 goto out;
3355
3356 case QCOW2_CLUSTER_NORMAL:
3357 child = bs->file;
3358 copy_offset += offset_into_cluster(s, src_offset);
3359 if ((copy_offset & 511) != 0) {
3360 ret = -EIO;
3361 goto out;
3362 }
3363 break;
3364
3365 default:
3366 abort();
3367 }
3368 qemu_co_mutex_unlock(&s->lock);
3369 ret = bdrv_co_copy_range_from(child,
3370 copy_offset,
3371 dst, dst_offset,
3372 cur_bytes, read_flags, cur_write_flags);
3373 qemu_co_mutex_lock(&s->lock);
3374 if (ret < 0) {
3375 goto out;
3376 }
3377
3378 bytes -= cur_bytes;
3379 src_offset += cur_bytes;
3380 dst_offset += cur_bytes;
3381 }
3382 ret = 0;
3383
3384out:
3385 qemu_co_mutex_unlock(&s->lock);
3386 return ret;
3387}
3388
3389static int coroutine_fn
3390qcow2_co_copy_range_to(BlockDriverState *bs,
3391 BdrvChild *src, uint64_t src_offset,
3392 BdrvChild *dst, uint64_t dst_offset,
3393 uint64_t bytes, BdrvRequestFlags read_flags,
3394 BdrvRequestFlags write_flags)
3395{
3396 BDRVQcow2State *s = bs->opaque;
3397 int offset_in_cluster;
3398 int ret;
3399 unsigned int cur_bytes;
3400 uint64_t cluster_offset;
3401 QCowL2Meta *l2meta = NULL;
3402
3403 assert(!bs->encrypted);
3404 s->cluster_cache_offset = -1;
3405
3406 qemu_co_mutex_lock(&s->lock);
3407
3408 while (bytes != 0) {
3409
3410 l2meta = NULL;
3411
3412 offset_in_cluster = offset_into_cluster(s, dst_offset);
3413 cur_bytes = MIN(bytes, INT_MAX);
3414
3415
3416
3417
3418
3419 ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
3420 &cluster_offset, &l2meta);
3421 if (ret < 0) {
3422 goto fail;
3423 }
3424
3425 assert((cluster_offset & 511) == 0);
3426
3427 ret = qcow2_pre_write_overlap_check(bs, 0,
3428 cluster_offset + offset_in_cluster, cur_bytes);
3429 if (ret < 0) {
3430 goto fail;
3431 }
3432
3433 qemu_co_mutex_unlock(&s->lock);
3434 ret = bdrv_co_copy_range_to(src, src_offset,
3435 bs->file,
3436 cluster_offset + offset_in_cluster,
3437 cur_bytes, read_flags, write_flags);
3438 qemu_co_mutex_lock(&s->lock);
3439 if (ret < 0) {
3440 goto fail;
3441 }
3442
3443 ret = qcow2_handle_l2meta(bs, &l2meta, true);
3444 if (ret) {
3445 goto fail;
3446 }
3447
3448 bytes -= cur_bytes;
3449 src_offset += cur_bytes;
3450 dst_offset += cur_bytes;
3451 }
3452 ret = 0;
3453
3454fail:
3455 qcow2_handle_l2meta(bs, &l2meta, false);
3456
3457 qemu_co_mutex_unlock(&s->lock);
3458
3459 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
3460
3461 return ret;
3462}
3463
3464static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
3465 PreallocMode prealloc, Error **errp)
3466{
3467 BDRVQcow2State *s = bs->opaque;
3468 uint64_t old_length;
3469 int64_t new_l1_size;
3470 int ret;
3471 QDict *options;
3472
3473 if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
3474 prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
3475 {
3476 error_setg(errp, "Unsupported preallocation mode '%s'",
3477 PreallocMode_str(prealloc));
3478 return -ENOTSUP;
3479 }
3480
3481 if (offset & 511) {
3482 error_setg(errp, "The new size must be a multiple of 512");
3483 return -EINVAL;
3484 }
3485
3486 qemu_co_mutex_lock(&s->lock);
3487
3488
3489 if (s->nb_snapshots) {
3490 error_setg(errp, "Can't resize an image which has snapshots");
3491 ret = -ENOTSUP;
3492 goto fail;
3493 }
3494
3495
3496 if (s->nb_bitmaps) {
3497
3498 error_setg(errp, "Can't resize an image which has bitmaps");
3499 ret = -ENOTSUP;
3500 goto fail;
3501 }
3502
3503 old_length = bs->total_sectors * BDRV_SECTOR_SIZE;
3504 new_l1_size = size_to_l1(s, offset);
3505
3506 if (offset < old_length) {
3507 int64_t last_cluster, old_file_size;
3508 if (prealloc != PREALLOC_MODE_OFF) {
3509 error_setg(errp,
3510 "Preallocation can't be used for shrinking an image");
3511 ret = -EINVAL;
3512 goto fail;
3513 }
3514
3515 ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
3516 old_length - ROUND_UP(offset,
3517 s->cluster_size),
3518 QCOW2_DISCARD_ALWAYS, true);
3519 if (ret < 0) {
3520 error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
3521 goto fail;
3522 }
3523
3524 ret = qcow2_shrink_l1_table(bs, new_l1_size);
3525 if (ret < 0) {
3526 error_setg_errno(errp, -ret,
3527 "Failed to reduce the number of L2 tables");
3528 goto fail;
3529 }
3530
3531 ret = qcow2_shrink_reftable(bs);
3532 if (ret < 0) {
3533 error_setg_errno(errp, -ret,
3534 "Failed to discard unused refblocks");
3535 goto fail;
3536 }
3537
3538 old_file_size = bdrv_getlength(bs->file->bs);
3539 if (old_file_size < 0) {
3540 error_setg_errno(errp, -old_file_size,
3541 "Failed to inquire current file length");
3542 ret = old_file_size;
3543 goto fail;
3544 }
3545 last_cluster = qcow2_get_last_cluster(bs, old_file_size);
3546 if (last_cluster < 0) {
3547 error_setg_errno(errp, -last_cluster,
3548 "Failed to find the last cluster");
3549 ret = last_cluster;
3550 goto fail;
3551 }
3552 if ((last_cluster + 1) * s->cluster_size < old_file_size) {
3553 Error *local_err = NULL;
3554
3555 bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
3556 PREALLOC_MODE_OFF, &local_err);
3557 if (local_err) {
3558 warn_reportf_err(local_err,
3559 "Failed to truncate the tail of the image: ");
3560 }
3561 }
3562 } else {
3563 ret = qcow2_grow_l1_table(bs, new_l1_size, true);
3564 if (ret < 0) {
3565 error_setg_errno(errp, -ret, "Failed to grow the L1 table");
3566 goto fail;
3567 }
3568 }
3569
3570 switch (prealloc) {
3571 case PREALLOC_MODE_OFF:
3572 break;
3573
3574 case PREALLOC_MODE_METADATA:
3575 ret = preallocate_co(bs, old_length, offset);
3576 if (ret < 0) {
3577 error_setg_errno(errp, -ret, "Preallocation failed");
3578 goto fail;
3579 }
3580 break;
3581
3582 case PREALLOC_MODE_FALLOC:
3583 case PREALLOC_MODE_FULL:
3584 {
3585 int64_t allocation_start, host_offset, guest_offset;
3586 int64_t clusters_allocated;
3587 int64_t old_file_size, new_file_size;
3588 uint64_t nb_new_data_clusters, nb_new_l2_tables;
3589
3590 old_file_size = bdrv_getlength(bs->file->bs);
3591 if (old_file_size < 0) {
3592 error_setg_errno(errp, -old_file_size,
3593 "Failed to inquire current file length");
3594 ret = old_file_size;
3595 goto fail;
3596 }
3597 old_file_size = ROUND_UP(old_file_size, s->cluster_size);
3598
3599 nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
3600 s->cluster_size);
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612 nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
3613 s->cluster_size / sizeof(uint64_t));
3614
3615
3616 nb_new_l2_tables++;
3617
3618 allocation_start = qcow2_refcount_area(bs, old_file_size,
3619 nb_new_data_clusters +
3620 nb_new_l2_tables,
3621 true, 0, 0);
3622 if (allocation_start < 0) {
3623 error_setg_errno(errp, -allocation_start,
3624 "Failed to resize refcount structures");
3625 ret = allocation_start;
3626 goto fail;
3627 }
3628
3629 clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
3630 nb_new_data_clusters);
3631 if (clusters_allocated < 0) {
3632 error_setg_errno(errp, -clusters_allocated,
3633 "Failed to allocate data clusters");
3634 ret = clusters_allocated;
3635 goto fail;
3636 }
3637
3638 assert(clusters_allocated == nb_new_data_clusters);
3639
3640
3641 new_file_size = allocation_start +
3642 nb_new_data_clusters * s->cluster_size;
3643 ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp);
3644 if (ret < 0) {
3645 error_prepend(errp, "Failed to resize underlying file: ");
3646 qcow2_free_clusters(bs, allocation_start,
3647 nb_new_data_clusters * s->cluster_size,
3648 QCOW2_DISCARD_OTHER);
3649 goto fail;
3650 }
3651
3652
3653 host_offset = allocation_start;
3654 guest_offset = old_length;
3655 while (nb_new_data_clusters) {
3656 int64_t nb_clusters = MIN(
3657 nb_new_data_clusters,
3658 s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
3659 QCowL2Meta allocation = {
3660 .offset = guest_offset,
3661 .alloc_offset = host_offset,
3662 .nb_clusters = nb_clusters,
3663 };
3664 qemu_co_queue_init(&allocation.dependent_requests);
3665
3666 ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
3667 if (ret < 0) {
3668 error_setg_errno(errp, -ret, "Failed to update L2 tables");
3669 qcow2_free_clusters(bs, host_offset,
3670 nb_new_data_clusters * s->cluster_size,
3671 QCOW2_DISCARD_OTHER);
3672 goto fail;
3673 }
3674
3675 guest_offset += nb_clusters * s->cluster_size;
3676 host_offset += nb_clusters * s->cluster_size;
3677 nb_new_data_clusters -= nb_clusters;
3678 }
3679 break;
3680 }
3681
3682 default:
3683 g_assert_not_reached();
3684 }
3685
3686 if (prealloc != PREALLOC_MODE_OFF) {
3687
3688 ret = qcow2_write_caches(bs);
3689 if (ret < 0) {
3690 error_setg_errno(errp, -ret,
3691 "Failed to flush the preallocated area to disk");
3692 goto fail;
3693 }
3694 }
3695
3696 bs->total_sectors = offset / BDRV_SECTOR_SIZE;
3697
3698
3699 offset = cpu_to_be64(offset);
3700 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
3701 &offset, sizeof(uint64_t));
3702 if (ret < 0) {
3703 error_setg_errno(errp, -ret, "Failed to update the image size");
3704 goto fail;
3705 }
3706
3707 s->l1_vm_state_index = new_l1_size;
3708
3709
3710 options = qdict_clone_shallow(bs->options);
3711 ret = qcow2_update_options(bs, options, s->flags, errp);
3712 qobject_unref(options);
3713 if (ret < 0) {
3714 goto fail;
3715 }
3716 ret = 0;
3717fail:
3718 qemu_co_mutex_unlock(&s->lock);
3719 return ret;
3720}
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
3733{
3734 ssize_t ret;
3735 z_stream strm;
3736
3737
3738 memset(&strm, 0, sizeof(strm));
3739 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
3740 -12, 9, Z_DEFAULT_STRATEGY);
3741 if (ret != 0) {
3742 return -2;
3743 }
3744
3745
3746
3747 strm.avail_in = size;
3748 strm.next_in = (void *) src;
3749 strm.avail_out = size - 1;
3750 strm.next_out = dest;
3751
3752 ret = deflate(&strm, Z_FINISH);
3753 if (ret == Z_STREAM_END) {
3754 ret = size - 1 - strm.avail_out;
3755 } else {
3756 ret = (ret == Z_OK ? -1 : -2);
3757 }
3758
3759 deflateEnd(&strm);
3760
3761 return ret;
3762}
3763
3764#define MAX_COMPRESS_THREADS 4
3765
3766typedef struct Qcow2CompressData {
3767 void *dest;
3768 const void *src;
3769 size_t size;
3770 ssize_t ret;
3771} Qcow2CompressData;
3772
3773static int qcow2_compress_pool_func(void *opaque)
3774{
3775 Qcow2CompressData *data = opaque;
3776
3777 data->ret = qcow2_compress(data->dest, data->src, data->size);
3778
3779 return 0;
3780}
3781
3782static void qcow2_compress_complete(void *opaque, int ret)
3783{
3784 qemu_coroutine_enter(opaque);
3785}
3786
3787
3788static ssize_t qcow2_co_compress(BlockDriverState *bs,
3789 void *dest, const void *src, size_t size)
3790{
3791 BDRVQcow2State *s = bs->opaque;
3792 BlockAIOCB *acb;
3793 ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
3794 Qcow2CompressData arg = {
3795 .dest = dest,
3796 .src = src,
3797 .size = size,
3798 };
3799
3800 while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
3801 qemu_co_queue_wait(&s->compress_wait_queue, NULL);
3802 }
3803
3804 s->nb_compress_threads++;
3805 acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
3806 qcow2_compress_complete,
3807 qemu_coroutine_self());
3808
3809 if (!acb) {
3810 s->nb_compress_threads--;
3811 return -EINVAL;
3812 }
3813 qemu_coroutine_yield();
3814 s->nb_compress_threads--;
3815 qemu_co_queue_next(&s->compress_wait_queue);
3816
3817 return arg.ret;
3818}
3819
3820
3821
3822static coroutine_fn int
3823qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
3824 uint64_t bytes, QEMUIOVector *qiov)
3825{
3826 BDRVQcow2State *s = bs->opaque;
3827 QEMUIOVector hd_qiov;
3828 struct iovec iov;
3829 int ret;
3830 size_t out_len;
3831 uint8_t *buf, *out_buf;
3832 int64_t cluster_offset;
3833
3834 if (bytes == 0) {
3835
3836
3837 cluster_offset = bdrv_getlength(bs->file->bs);
3838 if (cluster_offset < 0) {
3839 return cluster_offset;
3840 }
3841 return bdrv_co_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF,
3842 NULL);
3843 }
3844
3845 if (offset_into_cluster(s, offset)) {
3846 return -EINVAL;
3847 }
3848
3849 buf = qemu_blockalign(bs, s->cluster_size);
3850 if (bytes != s->cluster_size) {
3851 if (bytes > s->cluster_size ||
3852 offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
3853 {
3854 qemu_vfree(buf);
3855 return -EINVAL;
3856 }
3857
3858 memset(buf + bytes, 0, s->cluster_size - bytes);
3859 }
3860 qemu_iovec_to_buf(qiov, 0, buf, bytes);
3861
3862 out_buf = g_malloc(s->cluster_size);
3863
3864 out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size);
3865 if (out_len == -2) {
3866 ret = -EINVAL;
3867 goto fail;
3868 } else if (out_len == -1) {
3869
3870 ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
3871 if (ret < 0) {
3872 goto fail;
3873 }
3874 goto success;
3875 }
3876
3877 qemu_co_mutex_lock(&s->lock);
3878 cluster_offset =
3879 qcow2_alloc_compressed_cluster_offset(bs, offset, out_len);
3880 if (!cluster_offset) {
3881 qemu_co_mutex_unlock(&s->lock);
3882 ret = -EIO;
3883 goto fail;
3884 }
3885 cluster_offset &= s->cluster_offset_mask;
3886
3887 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
3888 qemu_co_mutex_unlock(&s->lock);
3889 if (ret < 0) {
3890 goto fail;
3891 }
3892
3893 iov = (struct iovec) {
3894 .iov_base = out_buf,
3895 .iov_len = out_len,
3896 };
3897 qemu_iovec_init_external(&hd_qiov, &iov, 1);
3898
3899 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
3900 ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
3901 if (ret < 0) {
3902 goto fail;
3903 }
3904success:
3905 ret = 0;
3906fail:
3907 qemu_vfree(buf);
3908 g_free(out_buf);
3909 return ret;
3910}
3911
3912static int make_completely_empty(BlockDriverState *bs)
3913{
3914 BDRVQcow2State *s = bs->opaque;
3915 Error *local_err = NULL;
3916 int ret, l1_clusters;
3917 int64_t offset;
3918 uint64_t *new_reftable = NULL;
3919 uint64_t rt_entry, l1_size2;
3920 struct {
3921 uint64_t l1_offset;
3922 uint64_t reftable_offset;
3923 uint32_t reftable_clusters;
3924 } QEMU_PACKED l1_ofs_rt_ofs_cls;
3925
3926 ret = qcow2_cache_empty(bs, s->l2_table_cache);
3927 if (ret < 0) {
3928 goto fail;
3929 }
3930
3931 ret = qcow2_cache_empty(bs, s->refcount_block_cache);
3932 if (ret < 0) {
3933 goto fail;
3934 }
3935
3936
3937 ret = qcow2_mark_dirty(bs);
3938 if (ret < 0) {
3939 goto fail;
3940 }
3941
3942 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
3943
3944 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
3945 l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
3946
3947
3948
3949
3950 ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
3951 l1_clusters * s->cluster_size, 0);
3952 if (ret < 0) {
3953 goto fail_broken_refcounts;
3954 }
3955 memset(s->l1_table, 0, l1_size2);
3956
3957 BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
3958
3959
3960
3961
3962
3963
3964 ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
3965 (2 + l1_clusters) * s->cluster_size, 0);
3966
3967
3968
3969
3970 if (ret < 0) {
3971 goto fail_broken_refcounts;
3972 }
3973
3974 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
3975 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
3976
3977
3978
3979
3980 l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
3981 l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
3982 l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
3983 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
3984 &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
3985 if (ret < 0) {
3986 goto fail_broken_refcounts;
3987 }
3988
3989 s->l1_table_offset = 3 * s->cluster_size;
3990
3991 new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
3992 if (!new_reftable) {
3993 ret = -ENOMEM;
3994 goto fail_broken_refcounts;
3995 }
3996
3997 s->refcount_table_offset = s->cluster_size;
3998 s->refcount_table_size = s->cluster_size / sizeof(uint64_t);
3999 s->max_refcount_table_index = 0;
4000
4001 g_free(s->refcount_table);
4002 s->refcount_table = new_reftable;
4003 new_reftable = NULL;
4004
4005
4006
4007
4008
4009
4010
4011 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
4012
4013
4014 rt_entry = cpu_to_be64(2 * s->cluster_size);
4015 ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
4016 &rt_entry, sizeof(rt_entry));
4017 if (ret < 0) {
4018 goto fail_broken_refcounts;
4019 }
4020 s->refcount_table[0] = 2 * s->cluster_size;
4021
4022 s->free_cluster_index = 0;
4023 assert(3 + l1_clusters <= s->refcount_block_size);
4024 offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
4025 if (offset < 0) {
4026 ret = offset;
4027 goto fail_broken_refcounts;
4028 } else if (offset > 0) {
4029 error_report("First cluster in emptied image is in use");
4030 abort();
4031 }
4032
4033
4034
4035 ret = qcow2_mark_clean(bs);
4036 if (ret < 0) {
4037 goto fail;
4038 }
4039
4040 ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size,
4041 PREALLOC_MODE_OFF, &local_err);
4042 if (ret < 0) {
4043 error_report_err(local_err);
4044 goto fail;
4045 }
4046
4047 return 0;
4048
4049fail_broken_refcounts:
4050
4051
4052
4053
4054
4055
4056 bs->drv = NULL;
4057
4058fail:
4059 g_free(new_reftable);
4060 return ret;
4061}
4062
4063static int qcow2_make_empty(BlockDriverState *bs)
4064{
4065 BDRVQcow2State *s = bs->opaque;
4066 uint64_t offset, end_offset;
4067 int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
4068 int l1_clusters, ret = 0;
4069
4070 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4071
4072 if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
4073 3 + l1_clusters <= s->refcount_block_size &&
4074 s->crypt_method_header != QCOW_CRYPT_LUKS) {
4075
4076
4077
4078
4079
4080
4081
4082 return make_completely_empty(bs);
4083 }
4084
4085
4086
4087 end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
4088 for (offset = 0; offset < end_offset; offset += step) {
4089
4090
4091
4092
4093
4094 ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
4095 QCOW2_DISCARD_SNAPSHOT, true);
4096 if (ret < 0) {
4097 break;
4098 }
4099 }
4100
4101 return ret;
4102}
4103
4104static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
4105{
4106 BDRVQcow2State *s = bs->opaque;
4107 int ret;
4108
4109 qemu_co_mutex_lock(&s->lock);
4110 ret = qcow2_write_caches(bs);
4111 qemu_co_mutex_unlock(&s->lock);
4112
4113 return ret;
4114}
4115
4116static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
4117 Error **errp)
4118{
4119 Error *local_err = NULL;
4120 BlockMeasureInfo *info;
4121 uint64_t required = 0;
4122 uint64_t virtual_size;
4123 uint64_t refcount_bits;
4124 uint64_t l2_tables;
4125 size_t cluster_size;
4126 int version;
4127 char *optstr;
4128 PreallocMode prealloc;
4129 bool has_backing_file;
4130
4131
4132 cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err);
4133 if (local_err) {
4134 goto err;
4135 }
4136
4137 version = qcow2_opt_get_version_del(opts, &local_err);
4138 if (local_err) {
4139 goto err;
4140 }
4141
4142 refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
4143 if (local_err) {
4144 goto err;
4145 }
4146
4147 optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
4148 prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
4149 PREALLOC_MODE_OFF, &local_err);
4150 g_free(optstr);
4151 if (local_err) {
4152 goto err;
4153 }
4154
4155 optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
4156 has_backing_file = !!optstr;
4157 g_free(optstr);
4158
4159 virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
4160 virtual_size = ROUND_UP(virtual_size, cluster_size);
4161
4162
4163 l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
4164 cluster_size / sizeof(uint64_t));
4165 if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) {
4166 error_setg(&local_err, "The image size is too large "
4167 "(try using a larger cluster size)");
4168 goto err;
4169 }
4170
4171
4172 if (in_bs) {
4173 int64_t ssize = bdrv_getlength(in_bs);
4174 if (ssize < 0) {
4175 error_setg_errno(&local_err, -ssize,
4176 "Unable to get image virtual_size");
4177 goto err;
4178 }
4179
4180 virtual_size = ROUND_UP(ssize, cluster_size);
4181
4182 if (has_backing_file) {
4183
4184
4185
4186
4187
4188 required = virtual_size;
4189 } else {
4190 int64_t offset;
4191 int64_t pnum = 0;
4192
4193 for (offset = 0; offset < ssize; offset += pnum) {
4194 int ret;
4195
4196 ret = bdrv_block_status_above(in_bs, NULL, offset,
4197 ssize - offset, &pnum, NULL,
4198 NULL);
4199 if (ret < 0) {
4200 error_setg_errno(&local_err, -ret,
4201 "Unable to get block status");
4202 goto err;
4203 }
4204
4205 if (ret & BDRV_BLOCK_ZERO) {
4206
4207 } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
4208 (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
4209
4210 pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
4211
4212
4213 required += offset % cluster_size + pnum;
4214 }
4215 }
4216 }
4217 }
4218
4219
4220
4221
4222 if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
4223 required = virtual_size;
4224 }
4225
4226 info = g_new(BlockMeasureInfo, 1);
4227 info->fully_allocated =
4228 qcow2_calc_prealloc_size(virtual_size, cluster_size,
4229 ctz32(refcount_bits));
4230
4231
4232
4233
4234
4235 info->required = info->fully_allocated - virtual_size + required;
4236 return info;
4237
4238err:
4239 error_propagate(errp, local_err);
4240 return NULL;
4241}
4242
4243static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4244{
4245 BDRVQcow2State *s = bs->opaque;
4246 bdi->unallocated_blocks_are_zero = true;
4247 bdi->cluster_size = s->cluster_size;
4248 bdi->vm_state_offset = qcow2_vm_state_offset(s);
4249 return 0;
4250}
4251
4252static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
4253{
4254 BDRVQcow2State *s = bs->opaque;
4255 ImageInfoSpecific *spec_info;
4256 QCryptoBlockInfo *encrypt_info = NULL;
4257
4258 if (s->crypto != NULL) {
4259 encrypt_info = qcrypto_block_get_info(s->crypto, &error_abort);
4260 }
4261
4262 spec_info = g_new(ImageInfoSpecific, 1);
4263 *spec_info = (ImageInfoSpecific){
4264 .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
4265 .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1),
4266 };
4267 if (s->qcow_version == 2) {
4268 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4269 .compat = g_strdup("0.10"),
4270 .refcount_bits = s->refcount_bits,
4271 };
4272 } else if (s->qcow_version == 3) {
4273 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4274 .compat = g_strdup("1.1"),
4275 .lazy_refcounts = s->compatible_features &
4276 QCOW2_COMPAT_LAZY_REFCOUNTS,
4277 .has_lazy_refcounts = true,
4278 .corrupt = s->incompatible_features &
4279 QCOW2_INCOMPAT_CORRUPT,
4280 .has_corrupt = true,
4281 .refcount_bits = s->refcount_bits,
4282 };
4283 } else {
4284
4285
4286 assert(false);
4287 }
4288
4289 if (encrypt_info) {
4290 ImageInfoSpecificQCow2Encryption *qencrypt =
4291 g_new(ImageInfoSpecificQCow2Encryption, 1);
4292 switch (encrypt_info->format) {
4293 case Q_CRYPTO_BLOCK_FORMAT_QCOW:
4294 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
4295 break;
4296 case Q_CRYPTO_BLOCK_FORMAT_LUKS:
4297 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
4298 qencrypt->u.luks = encrypt_info->u.luks;
4299 break;
4300 default:
4301 abort();
4302 }
4303
4304
4305 memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
4306 qapi_free_QCryptoBlockInfo(encrypt_info);
4307
4308 spec_info->u.qcow2.data->has_encrypt = true;
4309 spec_info->u.qcow2.data->encrypt = qencrypt;
4310 }
4311
4312 return spec_info;
4313}
4314
4315static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4316 int64_t pos)
4317{
4318 BDRVQcow2State *s = bs->opaque;
4319
4320 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
4321 return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
4322 qiov->size, qiov, 0);
4323}
4324
4325static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4326 int64_t pos)
4327{
4328 BDRVQcow2State *s = bs->opaque;
4329
4330 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
4331 return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
4332 qiov->size, qiov, 0);
4333}
4334
4335
4336
4337
4338
4339static int qcow2_downgrade(BlockDriverState *bs, int target_version,
4340 BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
4341 Error **errp)
4342{
4343 BDRVQcow2State *s = bs->opaque;
4344 int current_version = s->qcow_version;
4345 int ret;
4346
4347
4348 assert(target_version < current_version);
4349
4350
4351 assert(target_version == 2);
4352
4353 if (s->refcount_order != 4) {
4354 error_setg(errp, "compat=0.10 requires refcount_bits=16");
4355 return -ENOTSUP;
4356 }
4357
4358
4359 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
4360 ret = qcow2_mark_clean(bs);
4361 if (ret < 0) {
4362 error_setg_errno(errp, -ret, "Failed to make the image clean");
4363 return ret;
4364 }
4365 }
4366
4367
4368
4369
4370
4371 if (s->incompatible_features) {
4372 error_setg(errp, "Cannot downgrade an image with incompatible features "
4373 "%#" PRIx64 " set", s->incompatible_features);
4374 return -ENOTSUP;
4375 }
4376
4377
4378 s->compatible_features = 0;
4379
4380
4381
4382
4383 s->autoclear_features = 0;
4384
4385 ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
4386 if (ret < 0) {
4387 error_setg_errno(errp, -ret, "Failed to turn zero into data clusters");
4388 return ret;
4389 }
4390
4391 s->qcow_version = target_version;
4392 ret = qcow2_update_header(bs);
4393 if (ret < 0) {
4394 s->qcow_version = current_version;
4395 error_setg_errno(errp, -ret, "Failed to update the image header");
4396 return ret;
4397 }
4398 return 0;
4399}
4400
4401typedef enum Qcow2AmendOperation {
4402
4403
4404
4405 QCOW2_NO_OPERATION = 0,
4406
4407 QCOW2_CHANGING_REFCOUNT_ORDER,
4408 QCOW2_DOWNGRADING,
4409} Qcow2AmendOperation;
4410
4411typedef struct Qcow2AmendHelperCBInfo {
4412
4413
4414 BlockDriverAmendStatusCB *original_status_cb;
4415 void *original_cb_opaque;
4416
4417 Qcow2AmendOperation current_operation;
4418
4419
4420 int total_operations;
4421
4422
4423
4424
4425 int operations_completed;
4426
4427
4428 int64_t offset_completed;
4429
4430 Qcow2AmendOperation last_operation;
4431 int64_t last_work_size;
4432} Qcow2AmendHelperCBInfo;
4433
4434static void qcow2_amend_helper_cb(BlockDriverState *bs,
4435 int64_t operation_offset,
4436 int64_t operation_work_size, void *opaque)
4437{
4438 Qcow2AmendHelperCBInfo *info = opaque;
4439 int64_t current_work_size;
4440 int64_t projected_work_size;
4441
4442 if (info->current_operation != info->last_operation) {
4443 if (info->last_operation != QCOW2_NO_OPERATION) {
4444 info->offset_completed += info->last_work_size;
4445 info->operations_completed++;
4446 }
4447
4448 info->last_operation = info->current_operation;
4449 }
4450
4451 assert(info->total_operations > 0);
4452 assert(info->operations_completed < info->total_operations);
4453
4454 info->last_work_size = operation_work_size;
4455
4456 current_work_size = info->offset_completed + operation_work_size;
4457
4458
4459
4460
4461
4462 projected_work_size = current_work_size * (info->total_operations -
4463 info->operations_completed - 1)
4464 / (info->operations_completed + 1);
4465
4466 info->original_status_cb(bs, info->offset_completed + operation_offset,
4467 current_work_size + projected_work_size,
4468 info->original_cb_opaque);
4469}
4470
4471static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
4472 BlockDriverAmendStatusCB *status_cb,
4473 void *cb_opaque,
4474 Error **errp)
4475{
4476 BDRVQcow2State *s = bs->opaque;
4477 int old_version = s->qcow_version, new_version = old_version;
4478 uint64_t new_size = 0;
4479 const char *backing_file = NULL, *backing_format = NULL;
4480 bool lazy_refcounts = s->use_lazy_refcounts;
4481 const char *compat = NULL;
4482 uint64_t cluster_size = s->cluster_size;
4483 bool encrypt;
4484 int encformat;
4485 int refcount_bits = s->refcount_bits;
4486 int ret;
4487 QemuOptDesc *desc = opts->list->desc;
4488 Qcow2AmendHelperCBInfo helper_cb_info;
4489
4490 while (desc && desc->name) {
4491 if (!qemu_opt_find(opts, desc->name)) {
4492
4493 desc++;
4494 continue;
4495 }
4496
4497 if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
4498 compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
4499 if (!compat) {
4500
4501 } else if (!strcmp(compat, "0.10")) {
4502 new_version = 2;
4503 } else if (!strcmp(compat, "1.1")) {
4504 new_version = 3;
4505 } else {
4506 error_setg(errp, "Unknown compatibility level %s", compat);
4507 return -EINVAL;
4508 }
4509 } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
4510 error_setg(errp, "Cannot change preallocation mode");
4511 return -ENOTSUP;
4512 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
4513 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
4514 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
4515 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
4516 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
4517 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
4518 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
4519 encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
4520 !!s->crypto);
4521
4522 if (encrypt != !!s->crypto) {
4523 error_setg(errp,
4524 "Changing the encryption flag is not supported");
4525 return -ENOTSUP;
4526 }
4527 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
4528 encformat = qcow2_crypt_method_from_format(
4529 qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));
4530
4531 if (encformat != s->crypt_method_header) {
4532 error_setg(errp,
4533 "Changing the encryption format is not supported");
4534 return -ENOTSUP;
4535 }
4536 } else if (g_str_has_prefix(desc->name, "encrypt.")) {
4537 error_setg(errp,
4538 "Changing the encryption parameters is not supported");
4539 return -ENOTSUP;
4540 } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
4541 cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
4542 cluster_size);
4543 if (cluster_size != s->cluster_size) {
4544 error_setg(errp, "Changing the cluster size is not supported");
4545 return -ENOTSUP;
4546 }
4547 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
4548 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
4549 lazy_refcounts);
4550 } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
4551 refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
4552 refcount_bits);
4553
4554 if (refcount_bits <= 0 || refcount_bits > 64 ||
4555 !is_power_of_2(refcount_bits))
4556 {
4557 error_setg(errp, "Refcount width must be a power of two and "
4558 "may not exceed 64 bits");
4559 return -EINVAL;
4560 }
4561 } else {
4562
4563
4564 abort();
4565 }
4566
4567 desc++;
4568 }
4569
4570 helper_cb_info = (Qcow2AmendHelperCBInfo){
4571 .original_status_cb = status_cb,
4572 .original_cb_opaque = cb_opaque,
4573 .total_operations = (new_version < old_version)
4574 + (s->refcount_bits != refcount_bits)
4575 };
4576
4577
4578 if (new_version > old_version) {
4579 s->qcow_version = new_version;
4580 ret = qcow2_update_header(bs);
4581 if (ret < 0) {
4582 s->qcow_version = old_version;
4583 error_setg_errno(errp, -ret, "Failed to update the image header");
4584 return ret;
4585 }
4586 }
4587
4588 if (s->refcount_bits != refcount_bits) {
4589 int refcount_order = ctz32(refcount_bits);
4590
4591 if (new_version < 3 && refcount_bits != 16) {
4592 error_setg(errp, "Refcount widths other than 16 bits require "
4593 "compatibility level 1.1 or above (use compat=1.1 or "
4594 "greater)");
4595 return -EINVAL;
4596 }
4597
4598 helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
4599 ret = qcow2_change_refcount_order(bs, refcount_order,
4600 &qcow2_amend_helper_cb,
4601 &helper_cb_info, errp);
4602 if (ret < 0) {
4603 return ret;
4604 }
4605 }
4606
4607 if (backing_file || backing_format) {
4608 ret = qcow2_change_backing_file(bs,
4609 backing_file ?: s->image_backing_file,
4610 backing_format ?: s->image_backing_format);
4611 if (ret < 0) {
4612 error_setg_errno(errp, -ret, "Failed to change the backing file");
4613 return ret;
4614 }
4615 }
4616
4617 if (s->use_lazy_refcounts != lazy_refcounts) {
4618 if (lazy_refcounts) {
4619 if (new_version < 3) {
4620 error_setg(errp, "Lazy refcounts only supported with "
4621 "compatibility level 1.1 and above (use compat=1.1 "
4622 "or greater)");
4623 return -EINVAL;
4624 }
4625 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
4626 ret = qcow2_update_header(bs);
4627 if (ret < 0) {
4628 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
4629 error_setg_errno(errp, -ret, "Failed to update the image header");
4630 return ret;
4631 }
4632 s->use_lazy_refcounts = true;
4633 } else {
4634
4635 ret = qcow2_mark_clean(bs);
4636 if (ret < 0) {
4637 error_setg_errno(errp, -ret, "Failed to make the image clean");
4638 return ret;
4639 }
4640
4641 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
4642 ret = qcow2_update_header(bs);
4643 if (ret < 0) {
4644 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
4645 error_setg_errno(errp, -ret, "Failed to update the image header");
4646 return ret;
4647 }
4648 s->use_lazy_refcounts = false;
4649 }
4650 }
4651
4652 if (new_size) {
4653 BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
4654 ret = blk_insert_bs(blk, bs, errp);
4655 if (ret < 0) {
4656 blk_unref(blk);
4657 return ret;
4658 }
4659
4660 ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, errp);
4661 blk_unref(blk);
4662 if (ret < 0) {
4663 return ret;
4664 }
4665 }
4666
4667
4668 if (new_version < old_version) {
4669 helper_cb_info.current_operation = QCOW2_DOWNGRADING;
4670 ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
4671 &helper_cb_info, errp);
4672 if (ret < 0) {
4673 return ret;
4674 }
4675 }
4676
4677 return 0;
4678}
4679
4680
4681
4682
4683
4684
4685
4686void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
4687 int64_t size, const char *message_format, ...)
4688{
4689 BDRVQcow2State *s = bs->opaque;
4690 const char *node_name;
4691 char *message;
4692 va_list ap;
4693
4694 fatal = fatal && bdrv_is_writable(bs);
4695
4696 if (s->signaled_corruption &&
4697 (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
4698 {
4699 return;
4700 }
4701
4702 va_start(ap, message_format);
4703 message = g_strdup_vprintf(message_format, ap);
4704 va_end(ap);
4705
4706 if (fatal) {
4707 fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
4708 "corruption events will be suppressed\n", message);
4709 } else {
4710 fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
4711 "corruption events will be suppressed\n", message);
4712 }
4713
4714 node_name = bdrv_get_node_name(bs);
4715 qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
4716 *node_name != '\0', node_name,
4717 message, offset >= 0, offset,
4718 size >= 0, size,
4719 fatal);
4720 g_free(message);
4721
4722 if (fatal) {
4723 qcow2_mark_corrupt(bs);
4724 bs->drv = NULL;
4725 }
4726
4727 s->signaled_corruption = true;
4728}
4729
4730static QemuOptsList qcow2_create_opts = {
4731 .name = "qcow2-create-opts",
4732 .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
4733 .desc = {
4734 {
4735 .name = BLOCK_OPT_SIZE,
4736 .type = QEMU_OPT_SIZE,
4737 .help = "Virtual disk size"
4738 },
4739 {
4740 .name = BLOCK_OPT_COMPAT_LEVEL,
4741 .type = QEMU_OPT_STRING,
4742 .help = "Compatibility level (0.10 or 1.1)"
4743 },
4744 {
4745 .name = BLOCK_OPT_BACKING_FILE,
4746 .type = QEMU_OPT_STRING,
4747 .help = "File name of a base image"
4748 },
4749 {
4750 .name = BLOCK_OPT_BACKING_FMT,
4751 .type = QEMU_OPT_STRING,
4752 .help = "Image format of the base image"
4753 },
4754 {
4755 .name = BLOCK_OPT_ENCRYPT,
4756 .type = QEMU_OPT_BOOL,
4757 .help = "Encrypt the image with format 'aes'. (Deprecated "
4758 "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
4759 },
4760 {
4761 .name = BLOCK_OPT_ENCRYPT_FORMAT,
4762 .type = QEMU_OPT_STRING,
4763 .help = "Encrypt the image, format choices: 'aes', 'luks'",
4764 },
4765 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
4766 "ID of secret providing qcow AES key or LUKS passphrase"),
4767 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."),
4768 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."),
4769 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."),
4770 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."),
4771 BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."),
4772 BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
4773 {
4774 .name = BLOCK_OPT_CLUSTER_SIZE,
4775 .type = QEMU_OPT_SIZE,
4776 .help = "qcow2 cluster size",
4777 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
4778 },
4779 {
4780 .name = BLOCK_OPT_PREALLOC,
4781 .type = QEMU_OPT_STRING,
4782 .help = "Preallocation mode (allowed values: off, metadata, "
4783 "falloc, full)"
4784 },
4785 {
4786 .name = BLOCK_OPT_LAZY_REFCOUNTS,
4787 .type = QEMU_OPT_BOOL,
4788 .help = "Postpone refcount updates",
4789 .def_value_str = "off"
4790 },
4791 {
4792 .name = BLOCK_OPT_REFCOUNT_BITS,
4793 .type = QEMU_OPT_NUMBER,
4794 .help = "Width of a reference count entry in bits",
4795 .def_value_str = "16"
4796 },
4797 { }
4798 }
4799};
4800
4801BlockDriver bdrv_qcow2 = {
4802 .format_name = "qcow2",
4803 .instance_size = sizeof(BDRVQcow2State),
4804 .bdrv_probe = qcow2_probe,
4805 .bdrv_open = qcow2_open,
4806 .bdrv_close = qcow2_close,
4807 .bdrv_reopen_prepare = qcow2_reopen_prepare,
4808 .bdrv_reopen_commit = qcow2_reopen_commit,
4809 .bdrv_reopen_abort = qcow2_reopen_abort,
4810 .bdrv_join_options = qcow2_join_options,
4811 .bdrv_child_perm = bdrv_format_default_perms,
4812 .bdrv_co_create_opts = qcow2_co_create_opts,
4813 .bdrv_co_create = qcow2_co_create,
4814 .bdrv_has_zero_init = bdrv_has_zero_init_1,
4815 .bdrv_co_block_status = qcow2_co_block_status,
4816
4817 .bdrv_co_preadv = qcow2_co_preadv,
4818 .bdrv_co_pwritev = qcow2_co_pwritev,
4819 .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
4820
4821 .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
4822 .bdrv_co_pdiscard = qcow2_co_pdiscard,
4823 .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
4824 .bdrv_co_copy_range_to = qcow2_co_copy_range_to,
4825 .bdrv_co_truncate = qcow2_co_truncate,
4826 .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
4827 .bdrv_make_empty = qcow2_make_empty,
4828
4829 .bdrv_snapshot_create = qcow2_snapshot_create,
4830 .bdrv_snapshot_goto = qcow2_snapshot_goto,
4831 .bdrv_snapshot_delete = qcow2_snapshot_delete,
4832 .bdrv_snapshot_list = qcow2_snapshot_list,
4833 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
4834 .bdrv_measure = qcow2_measure,
4835 .bdrv_get_info = qcow2_get_info,
4836 .bdrv_get_specific_info = qcow2_get_specific_info,
4837
4838 .bdrv_save_vmstate = qcow2_save_vmstate,
4839 .bdrv_load_vmstate = qcow2_load_vmstate,
4840
4841 .supports_backing = true,
4842 .bdrv_change_backing_file = qcow2_change_backing_file,
4843
4844 .bdrv_refresh_limits = qcow2_refresh_limits,
4845 .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache,
4846 .bdrv_inactivate = qcow2_inactivate,
4847
4848 .create_opts = &qcow2_create_opts,
4849 .bdrv_co_check = qcow2_co_check,
4850 .bdrv_amend_options = qcow2_amend_options,
4851
4852 .bdrv_detach_aio_context = qcow2_detach_aio_context,
4853 .bdrv_attach_aio_context = qcow2_attach_aio_context,
4854
4855 .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
4856 .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
4857 .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap,
4858};
4859
4860static void bdrv_qcow2_init(void)
4861{
4862 bdrv_register(&bdrv_qcow2);
4863}
4864
4865block_init(bdrv_qcow2_init);
4866