1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26
27#define ZLIB_CONST
28#include <zlib.h>
29
30#include "block/block_int.h"
31#include "block/qdict.h"
32#include "sysemu/block-backend.h"
33#include "qemu/module.h"
34#include "qcow2.h"
35#include "qemu/error-report.h"
36#include "qapi/error.h"
37#include "qapi/qapi-events-block-core.h"
38#include "qapi/qmp/qdict.h"
39#include "qapi/qmp/qstring.h"
40#include "trace.h"
41#include "qemu/option_int.h"
42#include "qemu/cutils.h"
43#include "qemu/bswap.h"
44#include "qapi/qobject-input-visitor.h"
45#include "qapi/qapi-visit-block-core.h"
46#include "crypto.h"
47#include "block/thread-pool.h"
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66typedef struct {
67 uint32_t magic;
68 uint32_t len;
69} QEMU_PACKED QCowExtension;
70
71#define QCOW2_EXT_MAGIC_END 0
72#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
73#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
74#define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
75#define QCOW2_EXT_MAGIC_BITMAPS 0x23852875
76#define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
77
78static int coroutine_fn
79qcow2_co_preadv_compressed(BlockDriverState *bs,
80 uint64_t file_cluster_offset,
81 uint64_t offset,
82 uint64_t bytes,
83 QEMUIOVector *qiov);
84
85static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
86{
87 const QCowHeader *cow_header = (const void *)buf;
88
89 if (buf_size >= sizeof(QCowHeader) &&
90 be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
91 be32_to_cpu(cow_header->version) >= 2)
92 return 100;
93 else
94 return 0;
95}
96
97
98static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
99 uint8_t *buf, size_t buflen,
100 void *opaque, Error **errp)
101{
102 BlockDriverState *bs = opaque;
103 BDRVQcow2State *s = bs->opaque;
104 ssize_t ret;
105
106 if ((offset + buflen) > s->crypto_header.length) {
107 error_setg(errp, "Request for data outside of extension header");
108 return -1;
109 }
110
111 ret = bdrv_pread(bs->file,
112 s->crypto_header.offset + offset, buf, buflen);
113 if (ret < 0) {
114 error_setg_errno(errp, -ret, "Could not read encryption header");
115 return -1;
116 }
117 return ret;
118}
119
120
121static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
122 void *opaque, Error **errp)
123{
124 BlockDriverState *bs = opaque;
125 BDRVQcow2State *s = bs->opaque;
126 int64_t ret;
127 int64_t clusterlen;
128
129 ret = qcow2_alloc_clusters(bs, headerlen);
130 if (ret < 0) {
131 error_setg_errno(errp, -ret,
132 "Cannot allocate cluster for LUKS header size %zu",
133 headerlen);
134 return -1;
135 }
136
137 s->crypto_header.length = headerlen;
138 s->crypto_header.offset = ret;
139
140
141
142 clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
143 assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
144 ret = bdrv_pwrite_zeroes(bs->file,
145 ret + headerlen,
146 clusterlen - headerlen, 0);
147 if (ret < 0) {
148 error_setg_errno(errp, -ret, "Could not zero fill encryption header");
149 return -1;
150 }
151
152 return ret;
153}
154
155
156static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset,
157 const uint8_t *buf, size_t buflen,
158 void *opaque, Error **errp)
159{
160 BlockDriverState *bs = opaque;
161 BDRVQcow2State *s = bs->opaque;
162 ssize_t ret;
163
164 if ((offset + buflen) > s->crypto_header.length) {
165 error_setg(errp, "Request for data outside of extension header");
166 return -1;
167 }
168
169 ret = bdrv_pwrite(bs->file,
170 s->crypto_header.offset + offset, buf, buflen);
171 if (ret < 0) {
172 error_setg_errno(errp, -ret, "Could not read encryption header");
173 return -1;
174 }
175 return ret;
176}
177
178
179
180
181
182
183
184
185
186static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
187 uint64_t end_offset, void **p_feature_table,
188 int flags, bool *need_update_header,
189 Error **errp)
190{
191 BDRVQcow2State *s = bs->opaque;
192 QCowExtension ext;
193 uint64_t offset;
194 int ret;
195 Qcow2BitmapHeaderExt bitmaps_ext;
196
197 if (need_update_header != NULL) {
198 *need_update_header = false;
199 }
200
201#ifdef DEBUG_EXT
202 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
203#endif
204 offset = start_offset;
205 while (offset < end_offset) {
206
207#ifdef DEBUG_EXT
208
209 if (offset > s->cluster_size)
210 printf("qcow2_read_extension: suspicious offset %lu\n", offset);
211
212 printf("attempting to read extended header in offset %lu\n", offset);
213#endif
214
215 ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
216 if (ret < 0) {
217 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
218 "pread fail from offset %" PRIu64, offset);
219 return 1;
220 }
221 ext.magic = be32_to_cpu(ext.magic);
222 ext.len = be32_to_cpu(ext.len);
223 offset += sizeof(ext);
224#ifdef DEBUG_EXT
225 printf("ext.magic = 0x%x\n", ext.magic);
226#endif
227 if (offset > end_offset || ext.len > end_offset - offset) {
228 error_setg(errp, "Header extension too large");
229 return -EINVAL;
230 }
231
232 switch (ext.magic) {
233 case QCOW2_EXT_MAGIC_END:
234 return 0;
235
236 case QCOW2_EXT_MAGIC_BACKING_FORMAT:
237 if (ext.len >= sizeof(bs->backing_format)) {
238 error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
239 " too large (>=%zu)", ext.len,
240 sizeof(bs->backing_format));
241 return 2;
242 }
243 ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
244 if (ret < 0) {
245 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
246 "Could not read format name");
247 return 3;
248 }
249 bs->backing_format[ext.len] = '\0';
250 s->image_backing_format = g_strdup(bs->backing_format);
251#ifdef DEBUG_EXT
252 printf("Qcow2: Got format extension %s\n", bs->backing_format);
253#endif
254 break;
255
256 case QCOW2_EXT_MAGIC_FEATURE_TABLE:
257 if (p_feature_table != NULL) {
258 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
259 ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
260 if (ret < 0) {
261 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
262 "Could not read table");
263 return ret;
264 }
265
266 *p_feature_table = feature_table;
267 }
268 break;
269
270 case QCOW2_EXT_MAGIC_CRYPTO_HEADER: {
271 unsigned int cflags = 0;
272 if (s->crypt_method_header != QCOW_CRYPT_LUKS) {
273 error_setg(errp, "CRYPTO header extension only "
274 "expected with LUKS encryption method");
275 return -EINVAL;
276 }
277 if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) {
278 error_setg(errp, "CRYPTO header extension size %u, "
279 "but expected size %zu", ext.len,
280 sizeof(Qcow2CryptoHeaderExtension));
281 return -EINVAL;
282 }
283
284 ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len);
285 if (ret < 0) {
286 error_setg_errno(errp, -ret,
287 "Unable to read CRYPTO header extension");
288 return ret;
289 }
290 s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
291 s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
292
293 if ((s->crypto_header.offset % s->cluster_size) != 0) {
294 error_setg(errp, "Encryption header offset '%" PRIu64 "' is "
295 "not a multiple of cluster size '%u'",
296 s->crypto_header.offset, s->cluster_size);
297 return -EINVAL;
298 }
299
300 if (flags & BDRV_O_NO_IO) {
301 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
302 }
303 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
304 qcow2_crypto_hdr_read_func,
305 bs, cflags, 1, errp);
306 if (!s->crypto) {
307 return -EINVAL;
308 }
309 } break;
310
311 case QCOW2_EXT_MAGIC_BITMAPS:
312 if (ext.len != sizeof(bitmaps_ext)) {
313 error_setg_errno(errp, -ret, "bitmaps_ext: "
314 "Invalid extension length");
315 return -EINVAL;
316 }
317
318 if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) {
319 if (s->qcow_version < 3) {
320
321 warn_report("This qcow2 v2 image contains bitmaps, but "
322 "they may have been modified by a program "
323 "without persistent bitmap support; so now "
324 "they must all be considered inconsistent");
325 } else {
326 warn_report("a program lacking bitmap support "
327 "modified this file, so all bitmaps are now "
328 "considered inconsistent");
329 }
330 error_printf("Some clusters may be leaked, "
331 "run 'qemu-img check -r' on the image "
332 "file to fix.");
333 if (need_update_header != NULL) {
334
335 *need_update_header = true;
336 }
337 break;
338 }
339
340 ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len);
341 if (ret < 0) {
342 error_setg_errno(errp, -ret, "bitmaps_ext: "
343 "Could not read ext header");
344 return ret;
345 }
346
347 if (bitmaps_ext.reserved32 != 0) {
348 error_setg_errno(errp, -ret, "bitmaps_ext: "
349 "Reserved field is not zero");
350 return -EINVAL;
351 }
352
353 bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps);
354 bitmaps_ext.bitmap_directory_size =
355 be64_to_cpu(bitmaps_ext.bitmap_directory_size);
356 bitmaps_ext.bitmap_directory_offset =
357 be64_to_cpu(bitmaps_ext.bitmap_directory_offset);
358
359 if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) {
360 error_setg(errp,
361 "bitmaps_ext: Image has %" PRIu32 " bitmaps, "
362 "exceeding the QEMU supported maximum of %d",
363 bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS);
364 return -EINVAL;
365 }
366
367 if (bitmaps_ext.nb_bitmaps == 0) {
368 error_setg(errp, "found bitmaps extension with zero bitmaps");
369 return -EINVAL;
370 }
371
372 if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) {
373 error_setg(errp, "bitmaps_ext: "
374 "invalid bitmap directory offset");
375 return -EINVAL;
376 }
377
378 if (bitmaps_ext.bitmap_directory_size >
379 QCOW2_MAX_BITMAP_DIRECTORY_SIZE) {
380 error_setg(errp, "bitmaps_ext: "
381 "bitmap directory size (%" PRIu64 ") exceeds "
382 "the maximum supported size (%d)",
383 bitmaps_ext.bitmap_directory_size,
384 QCOW2_MAX_BITMAP_DIRECTORY_SIZE);
385 return -EINVAL;
386 }
387
388 s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
389 s->bitmap_directory_offset =
390 bitmaps_ext.bitmap_directory_offset;
391 s->bitmap_directory_size =
392 bitmaps_ext.bitmap_directory_size;
393
394#ifdef DEBUG_EXT
395 printf("Qcow2: Got bitmaps extension: "
396 "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
397 s->bitmap_directory_offset, s->nb_bitmaps);
398#endif
399 break;
400
401 case QCOW2_EXT_MAGIC_DATA_FILE:
402 {
403 s->image_data_file = g_malloc0(ext.len + 1);
404 ret = bdrv_pread(bs->file, offset, s->image_data_file, ext.len);
405 if (ret < 0) {
406 error_setg_errno(errp, -ret,
407 "ERROR: Could not read data file name");
408 return ret;
409 }
410#ifdef DEBUG_EXT
411 printf("Qcow2: Got external data file %s\n", s->image_data_file);
412#endif
413 break;
414 }
415
416 default:
417
418
419
420 {
421 Qcow2UnknownHeaderExtension *uext;
422
423 uext = g_malloc0(sizeof(*uext) + ext.len);
424 uext->magic = ext.magic;
425 uext->len = ext.len;
426 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
427
428 ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
429 if (ret < 0) {
430 error_setg_errno(errp, -ret, "ERROR: unknown extension: "
431 "Could not read data");
432 return ret;
433 }
434 }
435 break;
436 }
437
438 offset += ((ext.len + 7) & ~7);
439 }
440
441 return 0;
442}
443
444static void cleanup_unknown_header_ext(BlockDriverState *bs)
445{
446 BDRVQcow2State *s = bs->opaque;
447 Qcow2UnknownHeaderExtension *uext, *next;
448
449 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
450 QLIST_REMOVE(uext, next);
451 g_free(uext);
452 }
453}
454
455static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
456 uint64_t mask)
457{
458 char *features = g_strdup("");
459 char *old;
460
461 while (table && table->name[0] != '\0') {
462 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
463 if (mask & (1ULL << table->bit)) {
464 old = features;
465 features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
466 table->name);
467 g_free(old);
468 mask &= ~(1ULL << table->bit);
469 }
470 }
471 table++;
472 }
473
474 if (mask) {
475 old = features;
476 features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
477 old, *old ? ", " : "", mask);
478 g_free(old);
479 }
480
481 error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
482 g_free(features);
483}
484
485
486
487
488
489
490
491
492int qcow2_mark_dirty(BlockDriverState *bs)
493{
494 BDRVQcow2State *s = bs->opaque;
495 uint64_t val;
496 int ret;
497
498 assert(s->qcow_version >= 3);
499
500 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
501 return 0;
502 }
503
504 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
505 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
506 &val, sizeof(val));
507 if (ret < 0) {
508 return ret;
509 }
510 ret = bdrv_flush(bs->file->bs);
511 if (ret < 0) {
512 return ret;
513 }
514
515
516 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
517 return 0;
518}
519
520
521
522
523
524
525static int qcow2_mark_clean(BlockDriverState *bs)
526{
527 BDRVQcow2State *s = bs->opaque;
528
529 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
530 int ret;
531
532 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
533
534 ret = qcow2_flush_caches(bs);
535 if (ret < 0) {
536 return ret;
537 }
538
539 return qcow2_update_header(bs);
540 }
541 return 0;
542}
543
544
545
546
547int qcow2_mark_corrupt(BlockDriverState *bs)
548{
549 BDRVQcow2State *s = bs->opaque;
550
551 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
552 return qcow2_update_header(bs);
553}
554
555
556
557
558
559int qcow2_mark_consistent(BlockDriverState *bs)
560{
561 BDRVQcow2State *s = bs->opaque;
562
563 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
564 int ret = qcow2_flush_caches(bs);
565 if (ret < 0) {
566 return ret;
567 }
568
569 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
570 return qcow2_update_header(bs);
571 }
572 return 0;
573}
574
575static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
576 BdrvCheckResult *result,
577 BdrvCheckMode fix)
578{
579 int ret = qcow2_check_refcounts(bs, result, fix);
580 if (ret < 0) {
581 return ret;
582 }
583
584 if (fix && result->check_errors == 0 && result->corruptions == 0) {
585 ret = qcow2_mark_clean(bs);
586 if (ret < 0) {
587 return ret;
588 }
589 return qcow2_mark_consistent(bs);
590 }
591 return ret;
592}
593
594static int coroutine_fn qcow2_co_check(BlockDriverState *bs,
595 BdrvCheckResult *result,
596 BdrvCheckMode fix)
597{
598 BDRVQcow2State *s = bs->opaque;
599 int ret;
600
601 qemu_co_mutex_lock(&s->lock);
602 ret = qcow2_co_check_locked(bs, result, fix);
603 qemu_co_mutex_unlock(&s->lock);
604 return ret;
605}
606
607int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
608 uint64_t entries, size_t entry_len,
609 int64_t max_size_bytes, const char *table_name,
610 Error **errp)
611{
612 BDRVQcow2State *s = bs->opaque;
613
614 if (entries > max_size_bytes / entry_len) {
615 error_setg(errp, "%s too large", table_name);
616 return -EFBIG;
617 }
618
619
620
621 if ((INT64_MAX - entries * entry_len < offset) ||
622 (offset_into_cluster(s, offset) != 0)) {
623 error_setg(errp, "%s offset invalid", table_name);
624 return -EINVAL;
625 }
626
627 return 0;
628}
629
630static const char *const mutable_opts[] = {
631 QCOW2_OPT_LAZY_REFCOUNTS,
632 QCOW2_OPT_DISCARD_REQUEST,
633 QCOW2_OPT_DISCARD_SNAPSHOT,
634 QCOW2_OPT_DISCARD_OTHER,
635 QCOW2_OPT_OVERLAP,
636 QCOW2_OPT_OVERLAP_TEMPLATE,
637 QCOW2_OPT_OVERLAP_MAIN_HEADER,
638 QCOW2_OPT_OVERLAP_ACTIVE_L1,
639 QCOW2_OPT_OVERLAP_ACTIVE_L2,
640 QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
641 QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
642 QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
643 QCOW2_OPT_OVERLAP_INACTIVE_L1,
644 QCOW2_OPT_OVERLAP_INACTIVE_L2,
645 QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
646 QCOW2_OPT_CACHE_SIZE,
647 QCOW2_OPT_L2_CACHE_SIZE,
648 QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
649 QCOW2_OPT_REFCOUNT_CACHE_SIZE,
650 QCOW2_OPT_CACHE_CLEAN_INTERVAL,
651 NULL
652};
653
654static QemuOptsList qcow2_runtime_opts = {
655 .name = "qcow2",
656 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
657 .desc = {
658 {
659 .name = QCOW2_OPT_LAZY_REFCOUNTS,
660 .type = QEMU_OPT_BOOL,
661 .help = "Postpone refcount updates",
662 },
663 {
664 .name = QCOW2_OPT_DISCARD_REQUEST,
665 .type = QEMU_OPT_BOOL,
666 .help = "Pass guest discard requests to the layer below",
667 },
668 {
669 .name = QCOW2_OPT_DISCARD_SNAPSHOT,
670 .type = QEMU_OPT_BOOL,
671 .help = "Generate discard requests when snapshot related space "
672 "is freed",
673 },
674 {
675 .name = QCOW2_OPT_DISCARD_OTHER,
676 .type = QEMU_OPT_BOOL,
677 .help = "Generate discard requests when other clusters are freed",
678 },
679 {
680 .name = QCOW2_OPT_OVERLAP,
681 .type = QEMU_OPT_STRING,
682 .help = "Selects which overlap checks to perform from a range of "
683 "templates (none, constant, cached, all)",
684 },
685 {
686 .name = QCOW2_OPT_OVERLAP_TEMPLATE,
687 .type = QEMU_OPT_STRING,
688 .help = "Selects which overlap checks to perform from a range of "
689 "templates (none, constant, cached, all)",
690 },
691 {
692 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
693 .type = QEMU_OPT_BOOL,
694 .help = "Check for unintended writes into the main qcow2 header",
695 },
696 {
697 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
698 .type = QEMU_OPT_BOOL,
699 .help = "Check for unintended writes into the active L1 table",
700 },
701 {
702 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
703 .type = QEMU_OPT_BOOL,
704 .help = "Check for unintended writes into an active L2 table",
705 },
706 {
707 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
708 .type = QEMU_OPT_BOOL,
709 .help = "Check for unintended writes into the refcount table",
710 },
711 {
712 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
713 .type = QEMU_OPT_BOOL,
714 .help = "Check for unintended writes into a refcount block",
715 },
716 {
717 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
718 .type = QEMU_OPT_BOOL,
719 .help = "Check for unintended writes into the snapshot table",
720 },
721 {
722 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
723 .type = QEMU_OPT_BOOL,
724 .help = "Check for unintended writes into an inactive L1 table",
725 },
726 {
727 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
728 .type = QEMU_OPT_BOOL,
729 .help = "Check for unintended writes into an inactive L2 table",
730 },
731 {
732 .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
733 .type = QEMU_OPT_BOOL,
734 .help = "Check for unintended writes into the bitmap directory",
735 },
736 {
737 .name = QCOW2_OPT_CACHE_SIZE,
738 .type = QEMU_OPT_SIZE,
739 .help = "Maximum combined metadata (L2 tables and refcount blocks) "
740 "cache size",
741 },
742 {
743 .name = QCOW2_OPT_L2_CACHE_SIZE,
744 .type = QEMU_OPT_SIZE,
745 .help = "Maximum L2 table cache size",
746 },
747 {
748 .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
749 .type = QEMU_OPT_SIZE,
750 .help = "Size of each entry in the L2 cache",
751 },
752 {
753 .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
754 .type = QEMU_OPT_SIZE,
755 .help = "Maximum refcount block cache size",
756 },
757 {
758 .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
759 .type = QEMU_OPT_NUMBER,
760 .help = "Clean unused cache entries after this time (in seconds)",
761 },
762 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
763 "ID of secret providing qcow2 AES key or LUKS passphrase"),
764 { }
765 },
766};
767
768static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
769 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER,
770 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1,
771 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2,
772 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
773 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
774 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
775 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1,
776 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
777 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
778};
779
780static void cache_clean_timer_cb(void *opaque)
781{
782 BlockDriverState *bs = opaque;
783 BDRVQcow2State *s = bs->opaque;
784 qcow2_cache_clean_unused(s->l2_table_cache);
785 qcow2_cache_clean_unused(s->refcount_block_cache);
786 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
787 (int64_t) s->cache_clean_interval * 1000);
788}
789
790static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
791{
792 BDRVQcow2State *s = bs->opaque;
793 if (s->cache_clean_interval > 0) {
794 s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
795 SCALE_MS, cache_clean_timer_cb,
796 bs);
797 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
798 (int64_t) s->cache_clean_interval * 1000);
799 }
800}
801
802static void cache_clean_timer_del(BlockDriverState *bs)
803{
804 BDRVQcow2State *s = bs->opaque;
805 if (s->cache_clean_timer) {
806 timer_del(s->cache_clean_timer);
807 timer_free(s->cache_clean_timer);
808 s->cache_clean_timer = NULL;
809 }
810}
811
812static void qcow2_detach_aio_context(BlockDriverState *bs)
813{
814 cache_clean_timer_del(bs);
815}
816
817static void qcow2_attach_aio_context(BlockDriverState *bs,
818 AioContext *new_context)
819{
820 cache_clean_timer_init(bs, new_context);
821}
822
823static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
824 uint64_t *l2_cache_size,
825 uint64_t *l2_cache_entry_size,
826 uint64_t *refcount_cache_size, Error **errp)
827{
828 BDRVQcow2State *s = bs->opaque;
829 uint64_t combined_cache_size, l2_cache_max_setting;
830 bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
831 bool l2_cache_entry_size_set;
832 int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
833 uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
834 uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
835
836 combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
837 l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
838 refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
839 l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE);
840
841 combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
842 l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE,
843 DEFAULT_L2_CACHE_MAX_SIZE);
844 *refcount_cache_size = qemu_opt_get_size(opts,
845 QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
846
847 *l2_cache_entry_size = qemu_opt_get_size(
848 opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
849
850 *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);
851
852 if (combined_cache_size_set) {
853 if (l2_cache_size_set && refcount_cache_size_set) {
854 error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
855 " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
856 "at the same time");
857 return;
858 } else if (l2_cache_size_set &&
859 (l2_cache_max_setting > combined_cache_size)) {
860 error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
861 QCOW2_OPT_CACHE_SIZE);
862 return;
863 } else if (*refcount_cache_size > combined_cache_size) {
864 error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
865 QCOW2_OPT_CACHE_SIZE);
866 return;
867 }
868
869 if (l2_cache_size_set) {
870 *refcount_cache_size = combined_cache_size - *l2_cache_size;
871 } else if (refcount_cache_size_set) {
872 *l2_cache_size = combined_cache_size - *refcount_cache_size;
873 } else {
874
875
876 if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
877 *l2_cache_size = max_l2_cache;
878 *refcount_cache_size = combined_cache_size - *l2_cache_size;
879 } else {
880 *refcount_cache_size =
881 MIN(combined_cache_size, min_refcount_cache);
882 *l2_cache_size = combined_cache_size - *refcount_cache_size;
883 }
884 }
885 }
886
887
888
889
890
891
892 if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) {
893 *l2_cache_entry_size = MIN(s->cluster_size, 4096);
894 }
895
896
897
898
899 if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
900 *l2_cache_entry_size > s->cluster_size ||
901 !is_power_of_2(*l2_cache_entry_size)) {
902 error_setg(errp, "L2 cache entry size must be a power of two "
903 "between %d and the cluster size (%d)",
904 1 << MIN_CLUSTER_BITS, s->cluster_size);
905 return;
906 }
907}
908
909typedef struct Qcow2ReopenState {
910 Qcow2Cache *l2_table_cache;
911 Qcow2Cache *refcount_block_cache;
912 int l2_slice_size;
913 bool use_lazy_refcounts;
914 int overlap_check;
915 bool discard_passthrough[QCOW2_DISCARD_MAX];
916 uint64_t cache_clean_interval;
917 QCryptoBlockOpenOptions *crypto_opts;
918} Qcow2ReopenState;
919
920static int qcow2_update_options_prepare(BlockDriverState *bs,
921 Qcow2ReopenState *r,
922 QDict *options, int flags,
923 Error **errp)
924{
925 BDRVQcow2State *s = bs->opaque;
926 QemuOpts *opts = NULL;
927 const char *opt_overlap_check, *opt_overlap_check_template;
928 int overlap_check_template = 0;
929 uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
930 int i;
931 const char *encryptfmt;
932 QDict *encryptopts = NULL;
933 Error *local_err = NULL;
934 int ret;
935
936 qdict_extract_subqdict(options, &encryptopts, "encrypt.");
937 encryptfmt = qdict_get_try_str(encryptopts, "format");
938
939 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
940 qemu_opts_absorb_qdict(opts, options, &local_err);
941 if (local_err) {
942 error_propagate(errp, local_err);
943 ret = -EINVAL;
944 goto fail;
945 }
946
947
948 read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
949 &refcount_cache_size, &local_err);
950 if (local_err) {
951 error_propagate(errp, local_err);
952 ret = -EINVAL;
953 goto fail;
954 }
955
956 l2_cache_size /= l2_cache_entry_size;
957 if (l2_cache_size < MIN_L2_CACHE_SIZE) {
958 l2_cache_size = MIN_L2_CACHE_SIZE;
959 }
960 if (l2_cache_size > INT_MAX) {
961 error_setg(errp, "L2 cache size too big");
962 ret = -EINVAL;
963 goto fail;
964 }
965
966 refcount_cache_size /= s->cluster_size;
967 if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
968 refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
969 }
970 if (refcount_cache_size > INT_MAX) {
971 error_setg(errp, "Refcount cache size too big");
972 ret = -EINVAL;
973 goto fail;
974 }
975
976
977 if (s->l2_table_cache) {
978 ret = qcow2_cache_flush(bs, s->l2_table_cache);
979 if (ret) {
980 error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
981 goto fail;
982 }
983 }
984
985 if (s->refcount_block_cache) {
986 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
987 if (ret) {
988 error_setg_errno(errp, -ret,
989 "Failed to flush the refcount block cache");
990 goto fail;
991 }
992 }
993
994 r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t);
995 r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
996 l2_cache_entry_size);
997 r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
998 s->cluster_size);
999 if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
1000 error_setg(errp, "Could not allocate metadata caches");
1001 ret = -ENOMEM;
1002 goto fail;
1003 }
1004
1005
1006 r->cache_clean_interval =
1007 qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
1008 DEFAULT_CACHE_CLEAN_INTERVAL);
1009#ifndef CONFIG_LINUX
1010 if (r->cache_clean_interval != 0) {
1011 error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
1012 " not supported on this host");
1013 ret = -EINVAL;
1014 goto fail;
1015 }
1016#endif
1017 if (r->cache_clean_interval > UINT_MAX) {
1018 error_setg(errp, "Cache clean interval too big");
1019 ret = -EINVAL;
1020 goto fail;
1021 }
1022
1023
1024 r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
1025 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
1026 if (r->use_lazy_refcounts && s->qcow_version < 3) {
1027 error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
1028 "qemu 1.1 compatibility level");
1029 ret = -EINVAL;
1030 goto fail;
1031 }
1032
1033 if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
1034 ret = qcow2_mark_clean(bs);
1035 if (ret < 0) {
1036 error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
1037 goto fail;
1038 }
1039 }
1040
1041
1042 opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
1043 opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
1044 if (opt_overlap_check_template && opt_overlap_check &&
1045 strcmp(opt_overlap_check_template, opt_overlap_check))
1046 {
1047 error_setg(errp, "Conflicting values for qcow2 options '"
1048 QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
1049 "' ('%s')", opt_overlap_check, opt_overlap_check_template);
1050 ret = -EINVAL;
1051 goto fail;
1052 }
1053 if (!opt_overlap_check) {
1054 opt_overlap_check = opt_overlap_check_template ?: "cached";
1055 }
1056
1057 if (!strcmp(opt_overlap_check, "none")) {
1058 overlap_check_template = 0;
1059 } else if (!strcmp(opt_overlap_check, "constant")) {
1060 overlap_check_template = QCOW2_OL_CONSTANT;
1061 } else if (!strcmp(opt_overlap_check, "cached")) {
1062 overlap_check_template = QCOW2_OL_CACHED;
1063 } else if (!strcmp(opt_overlap_check, "all")) {
1064 overlap_check_template = QCOW2_OL_ALL;
1065 } else {
1066 error_setg(errp, "Unsupported value '%s' for qcow2 option "
1067 "'overlap-check'. Allowed are any of the following: "
1068 "none, constant, cached, all", opt_overlap_check);
1069 ret = -EINVAL;
1070 goto fail;
1071 }
1072
1073 r->overlap_check = 0;
1074 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
1075
1076
1077 r->overlap_check |=
1078 qemu_opt_get_bool(opts, overlap_bool_option_names[i],
1079 overlap_check_template & (1 << i)) << i;
1080 }
1081
1082 r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
1083 r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
1084 r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
1085 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
1086 flags & BDRV_O_UNMAP);
1087 r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
1088 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
1089 r->discard_passthrough[QCOW2_DISCARD_OTHER] =
1090 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
1091
1092 switch (s->crypt_method_header) {
1093 case QCOW_CRYPT_NONE:
1094 if (encryptfmt) {
1095 error_setg(errp, "No encryption in image header, but options "
1096 "specified format '%s'", encryptfmt);
1097 ret = -EINVAL;
1098 goto fail;
1099 }
1100 break;
1101
1102 case QCOW_CRYPT_AES:
1103 if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
1104 error_setg(errp,
1105 "Header reported 'aes' encryption format but "
1106 "options specify '%s'", encryptfmt);
1107 ret = -EINVAL;
1108 goto fail;
1109 }
1110 qdict_put_str(encryptopts, "format", "qcow");
1111 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1112 break;
1113
1114 case QCOW_CRYPT_LUKS:
1115 if (encryptfmt && !g_str_equal(encryptfmt, "luks")) {
1116 error_setg(errp,
1117 "Header reported 'luks' encryption format but "
1118 "options specify '%s'", encryptfmt);
1119 ret = -EINVAL;
1120 goto fail;
1121 }
1122 qdict_put_str(encryptopts, "format", "luks");
1123 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp);
1124 break;
1125
1126 default:
1127 error_setg(errp, "Unsupported encryption method %d",
1128 s->crypt_method_header);
1129 break;
1130 }
1131 if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) {
1132 ret = -EINVAL;
1133 goto fail;
1134 }
1135
1136 ret = 0;
1137fail:
1138 qobject_unref(encryptopts);
1139 qemu_opts_del(opts);
1140 opts = NULL;
1141 return ret;
1142}
1143
1144static void qcow2_update_options_commit(BlockDriverState *bs,
1145 Qcow2ReopenState *r)
1146{
1147 BDRVQcow2State *s = bs->opaque;
1148 int i;
1149
1150 if (s->l2_table_cache) {
1151 qcow2_cache_destroy(s->l2_table_cache);
1152 }
1153 if (s->refcount_block_cache) {
1154 qcow2_cache_destroy(s->refcount_block_cache);
1155 }
1156 s->l2_table_cache = r->l2_table_cache;
1157 s->refcount_block_cache = r->refcount_block_cache;
1158 s->l2_slice_size = r->l2_slice_size;
1159
1160 s->overlap_check = r->overlap_check;
1161 s->use_lazy_refcounts = r->use_lazy_refcounts;
1162
1163 for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
1164 s->discard_passthrough[i] = r->discard_passthrough[i];
1165 }
1166
1167 if (s->cache_clean_interval != r->cache_clean_interval) {
1168 cache_clean_timer_del(bs);
1169 s->cache_clean_interval = r->cache_clean_interval;
1170 cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
1171 }
1172
1173 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1174 s->crypto_opts = r->crypto_opts;
1175}
1176
1177static void qcow2_update_options_abort(BlockDriverState *bs,
1178 Qcow2ReopenState *r)
1179{
1180 if (r->l2_table_cache) {
1181 qcow2_cache_destroy(r->l2_table_cache);
1182 }
1183 if (r->refcount_block_cache) {
1184 qcow2_cache_destroy(r->refcount_block_cache);
1185 }
1186 qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
1187}
1188
1189static int qcow2_update_options(BlockDriverState *bs, QDict *options,
1190 int flags, Error **errp)
1191{
1192 Qcow2ReopenState r = {};
1193 int ret;
1194
1195 ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
1196 if (ret >= 0) {
1197 qcow2_update_options_commit(bs, &r);
1198 } else {
1199 qcow2_update_options_abort(bs, &r);
1200 }
1201
1202 return ret;
1203}
1204
1205
1206static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
1207 int flags, Error **errp)
1208{
1209 BDRVQcow2State *s = bs->opaque;
1210 unsigned int len, i;
1211 int ret = 0;
1212 QCowHeader header;
1213 Error *local_err = NULL;
1214 uint64_t ext_end;
1215 uint64_t l1_vm_state_index;
1216 bool update_header = false;
1217
1218 ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
1219 if (ret < 0) {
1220 error_setg_errno(errp, -ret, "Could not read qcow2 header");
1221 goto fail;
1222 }
1223 header.magic = be32_to_cpu(header.magic);
1224 header.version = be32_to_cpu(header.version);
1225 header.backing_file_offset = be64_to_cpu(header.backing_file_offset);
1226 header.backing_file_size = be32_to_cpu(header.backing_file_size);
1227 header.size = be64_to_cpu(header.size);
1228 header.cluster_bits = be32_to_cpu(header.cluster_bits);
1229 header.crypt_method = be32_to_cpu(header.crypt_method);
1230 header.l1_table_offset = be64_to_cpu(header.l1_table_offset);
1231 header.l1_size = be32_to_cpu(header.l1_size);
1232 header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset);
1233 header.refcount_table_clusters =
1234 be32_to_cpu(header.refcount_table_clusters);
1235 header.snapshots_offset = be64_to_cpu(header.snapshots_offset);
1236 header.nb_snapshots = be32_to_cpu(header.nb_snapshots);
1237
1238 if (header.magic != QCOW_MAGIC) {
1239 error_setg(errp, "Image is not in qcow2 format");
1240 ret = -EINVAL;
1241 goto fail;
1242 }
1243 if (header.version < 2 || header.version > 3) {
1244 error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
1245 ret = -ENOTSUP;
1246 goto fail;
1247 }
1248
1249 s->qcow_version = header.version;
1250
1251
1252 if (header.cluster_bits < MIN_CLUSTER_BITS ||
1253 header.cluster_bits > MAX_CLUSTER_BITS) {
1254 error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
1255 header.cluster_bits);
1256 ret = -EINVAL;
1257 goto fail;
1258 }
1259
1260 s->cluster_bits = header.cluster_bits;
1261 s->cluster_size = 1 << s->cluster_bits;
1262 s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS);
1263
1264
1265 if (header.version == 2) {
1266 header.incompatible_features = 0;
1267 header.compatible_features = 0;
1268 header.autoclear_features = 0;
1269 header.refcount_order = 4;
1270 header.header_length = 72;
1271 } else {
1272 header.incompatible_features =
1273 be64_to_cpu(header.incompatible_features);
1274 header.compatible_features = be64_to_cpu(header.compatible_features);
1275 header.autoclear_features = be64_to_cpu(header.autoclear_features);
1276 header.refcount_order = be32_to_cpu(header.refcount_order);
1277 header.header_length = be32_to_cpu(header.header_length);
1278
1279 if (header.header_length < 104) {
1280 error_setg(errp, "qcow2 header too short");
1281 ret = -EINVAL;
1282 goto fail;
1283 }
1284 }
1285
1286 if (header.header_length > s->cluster_size) {
1287 error_setg(errp, "qcow2 header exceeds cluster size");
1288 ret = -EINVAL;
1289 goto fail;
1290 }
1291
1292 if (header.header_length > sizeof(header)) {
1293 s->unknown_header_fields_size = header.header_length - sizeof(header);
1294 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
1295 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
1296 s->unknown_header_fields_size);
1297 if (ret < 0) {
1298 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
1299 "fields");
1300 goto fail;
1301 }
1302 }
1303
1304 if (header.backing_file_offset > s->cluster_size) {
1305 error_setg(errp, "Invalid backing file offset");
1306 ret = -EINVAL;
1307 goto fail;
1308 }
1309
1310 if (header.backing_file_offset) {
1311 ext_end = header.backing_file_offset;
1312 } else {
1313 ext_end = 1 << header.cluster_bits;
1314 }
1315
1316
1317 s->incompatible_features = header.incompatible_features;
1318 s->compatible_features = header.compatible_features;
1319 s->autoclear_features = header.autoclear_features;
1320
1321 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
1322 void *feature_table = NULL;
1323 qcow2_read_extensions(bs, header.header_length, ext_end,
1324 &feature_table, flags, NULL, NULL);
1325 report_unsupported_feature(errp, feature_table,
1326 s->incompatible_features &
1327 ~QCOW2_INCOMPAT_MASK);
1328 ret = -ENOTSUP;
1329 g_free(feature_table);
1330 goto fail;
1331 }
1332
1333 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
1334
1335
1336 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
1337 error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
1338 "read/write");
1339 ret = -EACCES;
1340 goto fail;
1341 }
1342 }
1343
1344
1345 if (header.refcount_order > 6) {
1346 error_setg(errp, "Reference count entry width too large; may not "
1347 "exceed 64 bits");
1348 ret = -EINVAL;
1349 goto fail;
1350 }
1351 s->refcount_order = header.refcount_order;
1352 s->refcount_bits = 1 << s->refcount_order;
1353 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
1354 s->refcount_max += s->refcount_max - 1;
1355
1356 s->crypt_method_header = header.crypt_method;
1357 if (s->crypt_method_header) {
1358 if (bdrv_uses_whitelist() &&
1359 s->crypt_method_header == QCOW_CRYPT_AES) {
1360 error_setg(errp,
1361 "Use of AES-CBC encrypted qcow2 images is no longer "
1362 "supported in system emulators");
1363 error_append_hint(errp,
1364 "You can use 'qemu-img convert' to convert your "
1365 "image to an alternative supported format, such "
1366 "as unencrypted qcow2, or raw with the LUKS "
1367 "format instead.\n");
1368 ret = -ENOSYS;
1369 goto fail;
1370 }
1371
1372 if (s->crypt_method_header == QCOW_CRYPT_AES) {
1373 s->crypt_physical_offset = false;
1374 } else {
1375
1376
1377
1378 s->crypt_physical_offset = true;
1379 }
1380
1381 bs->encrypted = true;
1382 }
1383
1384 s->l2_bits = s->cluster_bits - 3;
1385 s->l2_size = 1 << s->l2_bits;
1386
1387 s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
1388 s->refcount_block_size = 1 << s->refcount_block_bits;
1389 bs->total_sectors = header.size / BDRV_SECTOR_SIZE;
1390 s->csize_shift = (62 - (s->cluster_bits - 8));
1391 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
1392 s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
1393
1394 s->refcount_table_offset = header.refcount_table_offset;
1395 s->refcount_table_size =
1396 header.refcount_table_clusters << (s->cluster_bits - 3);
1397
1398 if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) {
1399 error_setg(errp, "Image does not contain a reference count table");
1400 ret = -EINVAL;
1401 goto fail;
1402 }
1403
1404 ret = qcow2_validate_table(bs, s->refcount_table_offset,
1405 header.refcount_table_clusters,
1406 s->cluster_size, QCOW_MAX_REFTABLE_SIZE,
1407 "Reference count table", errp);
1408 if (ret < 0) {
1409 goto fail;
1410 }
1411
1412
1413
1414
1415
1416 ret = qcow2_validate_table(bs, header.snapshots_offset,
1417 header.nb_snapshots,
1418 sizeof(QCowSnapshotHeader),
1419 sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
1420 "Snapshot table", errp);
1421 if (ret < 0) {
1422 goto fail;
1423 }
1424
1425
1426 ret = qcow2_validate_table(bs, header.l1_table_offset,
1427 header.l1_size, sizeof(uint64_t),
1428 QCOW_MAX_L1_SIZE, "Active L1 table", errp);
1429 if (ret < 0) {
1430 goto fail;
1431 }
1432 s->l1_size = header.l1_size;
1433 s->l1_table_offset = header.l1_table_offset;
1434
1435 l1_vm_state_index = size_to_l1(s, header.size);
1436 if (l1_vm_state_index > INT_MAX) {
1437 error_setg(errp, "Image is too big");
1438 ret = -EFBIG;
1439 goto fail;
1440 }
1441 s->l1_vm_state_index = l1_vm_state_index;
1442
1443
1444
1445 if (s->l1_size < s->l1_vm_state_index) {
1446 error_setg(errp, "L1 table is too small");
1447 ret = -EINVAL;
1448 goto fail;
1449 }
1450
1451 if (s->l1_size > 0) {
1452 s->l1_table = qemu_try_blockalign(bs->file->bs,
1453 ROUND_UP(s->l1_size * sizeof(uint64_t), 512));
1454 if (s->l1_table == NULL) {
1455 error_setg(errp, "Could not allocate L1 table");
1456 ret = -ENOMEM;
1457 goto fail;
1458 }
1459 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
1460 s->l1_size * sizeof(uint64_t));
1461 if (ret < 0) {
1462 error_setg_errno(errp, -ret, "Could not read L1 table");
1463 goto fail;
1464 }
1465 for(i = 0;i < s->l1_size; i++) {
1466 s->l1_table[i] = be64_to_cpu(s->l1_table[i]);
1467 }
1468 }
1469
1470
1471 ret = qcow2_update_options(bs, options, flags, errp);
1472 if (ret < 0) {
1473 goto fail;
1474 }
1475
1476 s->flags = flags;
1477
1478 ret = qcow2_refcount_init(bs);
1479 if (ret != 0) {
1480 error_setg_errno(errp, -ret, "Could not initialize refcount handling");
1481 goto fail;
1482 }
1483
1484 QLIST_INIT(&s->cluster_allocs);
1485 QTAILQ_INIT(&s->discards);
1486
1487
1488 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
1489 flags, &update_header, &local_err)) {
1490 error_propagate(errp, local_err);
1491 ret = -EINVAL;
1492 goto fail;
1493 }
1494
1495
1496 s->data_file = bdrv_open_child(NULL, options, "data-file", bs, &child_file,
1497 true, &local_err);
1498 if (local_err) {
1499 error_propagate(errp, local_err);
1500 ret = -EINVAL;
1501 goto fail;
1502 }
1503
1504 if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
1505 if (!s->data_file && s->image_data_file) {
1506 s->data_file = bdrv_open_child(s->image_data_file, options,
1507 "data-file", bs, &child_file,
1508 false, errp);
1509 if (!s->data_file) {
1510 ret = -EINVAL;
1511 goto fail;
1512 }
1513 }
1514 if (!s->data_file) {
1515 error_setg(errp, "'data-file' is required for this image");
1516 ret = -EINVAL;
1517 goto fail;
1518 }
1519 } else {
1520 if (s->data_file) {
1521 error_setg(errp, "'data-file' can only be set for images with an "
1522 "external data file");
1523 ret = -EINVAL;
1524 goto fail;
1525 }
1526
1527 s->data_file = bs->file;
1528
1529 if (data_file_is_raw(bs)) {
1530 error_setg(errp, "data-file-raw requires a data file");
1531 ret = -EINVAL;
1532 goto fail;
1533 }
1534 }
1535
1536
1537
1538
1539
1540 if (s->crypt_method_header && !s->crypto) {
1541 if (s->crypt_method_header == QCOW_CRYPT_AES) {
1542 unsigned int cflags = 0;
1543 if (flags & BDRV_O_NO_IO) {
1544 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
1545 }
1546 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
1547 NULL, NULL, cflags, 1, errp);
1548 if (!s->crypto) {
1549 ret = -EINVAL;
1550 goto fail;
1551 }
1552 } else if (!(flags & BDRV_O_NO_IO)) {
1553 error_setg(errp, "Missing CRYPTO header for crypt method %d",
1554 s->crypt_method_header);
1555 ret = -EINVAL;
1556 goto fail;
1557 }
1558 }
1559
1560
1561 if (header.backing_file_offset != 0) {
1562 len = header.backing_file_size;
1563 if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
1564 len >= sizeof(bs->backing_file)) {
1565 error_setg(errp, "Backing file name too long");
1566 ret = -EINVAL;
1567 goto fail;
1568 }
1569 ret = bdrv_pread(bs->file, header.backing_file_offset,
1570 bs->auto_backing_file, len);
1571 if (ret < 0) {
1572 error_setg_errno(errp, -ret, "Could not read backing file name");
1573 goto fail;
1574 }
1575 bs->auto_backing_file[len] = '\0';
1576 pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1577 bs->auto_backing_file);
1578 s->image_backing_file = g_strdup(bs->auto_backing_file);
1579 }
1580
1581
1582 s->snapshots_offset = header.snapshots_offset;
1583 s->nb_snapshots = header.nb_snapshots;
1584
1585 ret = qcow2_read_snapshots(bs);
1586 if (ret < 0) {
1587 error_setg_errno(errp, -ret, "Could not read snapshots");
1588 goto fail;
1589 }
1590
1591
1592 update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
1593 update_header =
1594 update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
1595 if (update_header) {
1596 s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
1597 }
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657 if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
1658
1659 bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err);
1660
1661 update_header = update_header && !header_updated;
1662 }
1663 if (local_err != NULL) {
1664 error_propagate(errp, local_err);
1665 ret = -EINVAL;
1666 goto fail;
1667 }
1668
1669 if (update_header) {
1670 ret = qcow2_update_header(bs);
1671 if (ret < 0) {
1672 error_setg_errno(errp, -ret, "Could not update qcow2 header");
1673 goto fail;
1674 }
1675 }
1676
1677 bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0;
1678
1679
1680 if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
1681 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
1682 BdrvCheckResult result = {0};
1683
1684 ret = qcow2_co_check_locked(bs, &result,
1685 BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
1686 if (ret < 0 || result.check_errors) {
1687 if (ret >= 0) {
1688 ret = -EIO;
1689 }
1690 error_setg_errno(errp, -ret, "Could not repair dirty image");
1691 goto fail;
1692 }
1693 }
1694
1695#ifdef DEBUG_ALLOC
1696 {
1697 BdrvCheckResult result = {0};
1698 qcow2_check_refcounts(bs, &result, 0);
1699 }
1700#endif
1701
1702 qemu_co_queue_init(&s->compress_wait_queue);
1703
1704 return ret;
1705
1706 fail:
1707 g_free(s->image_data_file);
1708 if (has_data_file(bs)) {
1709 bdrv_unref_child(bs, s->data_file);
1710 }
1711 g_free(s->unknown_header_fields);
1712 cleanup_unknown_header_ext(bs);
1713 qcow2_free_snapshots(bs);
1714 qcow2_refcount_close(bs);
1715 qemu_vfree(s->l1_table);
1716
1717 s->l1_table = NULL;
1718 cache_clean_timer_del(bs);
1719 if (s->l2_table_cache) {
1720 qcow2_cache_destroy(s->l2_table_cache);
1721 }
1722 if (s->refcount_block_cache) {
1723 qcow2_cache_destroy(s->refcount_block_cache);
1724 }
1725 qcrypto_block_free(s->crypto);
1726 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
1727 return ret;
1728}
1729
1730typedef struct QCow2OpenCo {
1731 BlockDriverState *bs;
1732 QDict *options;
1733 int flags;
1734 Error **errp;
1735 int ret;
1736} QCow2OpenCo;
1737
1738static void coroutine_fn qcow2_open_entry(void *opaque)
1739{
1740 QCow2OpenCo *qoc = opaque;
1741 BDRVQcow2State *s = qoc->bs->opaque;
1742
1743 qemu_co_mutex_lock(&s->lock);
1744 qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp);
1745 qemu_co_mutex_unlock(&s->lock);
1746}
1747
1748static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
1749 Error **errp)
1750{
1751 BDRVQcow2State *s = bs->opaque;
1752 QCow2OpenCo qoc = {
1753 .bs = bs,
1754 .options = options,
1755 .flags = flags,
1756 .errp = errp,
1757 .ret = -EINPROGRESS
1758 };
1759
1760 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
1761 false, errp);
1762 if (!bs->file) {
1763 return -EINVAL;
1764 }
1765
1766
1767 qemu_co_mutex_init(&s->lock);
1768
1769 if (qemu_in_coroutine()) {
1770
1771 qcow2_open_entry(&qoc);
1772 } else {
1773 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
1774 qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
1775 BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
1776 }
1777 return qoc.ret;
1778}
1779
1780static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
1781{
1782 BDRVQcow2State *s = bs->opaque;
1783
1784 if (bs->encrypted) {
1785
1786 bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
1787 }
1788 bs->bl.pwrite_zeroes_alignment = s->cluster_size;
1789 bs->bl.pdiscard_alignment = s->cluster_size;
1790}
1791
1792static int qcow2_reopen_prepare(BDRVReopenState *state,
1793 BlockReopenQueue *queue, Error **errp)
1794{
1795 Qcow2ReopenState *r;
1796 int ret;
1797
1798 r = g_new0(Qcow2ReopenState, 1);
1799 state->opaque = r;
1800
1801 ret = qcow2_update_options_prepare(state->bs, r, state->options,
1802 state->flags, errp);
1803 if (ret < 0) {
1804 goto fail;
1805 }
1806
1807
1808 if ((state->flags & BDRV_O_RDWR) == 0) {
1809 ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
1810 if (ret < 0) {
1811 goto fail;
1812 }
1813
1814 ret = bdrv_flush(state->bs);
1815 if (ret < 0) {
1816 goto fail;
1817 }
1818
1819 ret = qcow2_mark_clean(state->bs);
1820 if (ret < 0) {
1821 goto fail;
1822 }
1823 }
1824
1825 return 0;
1826
1827fail:
1828 qcow2_update_options_abort(state->bs, r);
1829 g_free(r);
1830 return ret;
1831}
1832
1833static void qcow2_reopen_commit(BDRVReopenState *state)
1834{
1835 qcow2_update_options_commit(state->bs, state->opaque);
1836 g_free(state->opaque);
1837}
1838
1839static void qcow2_reopen_abort(BDRVReopenState *state)
1840{
1841 qcow2_update_options_abort(state->bs, state->opaque);
1842 g_free(state->opaque);
1843}
1844
1845static void qcow2_join_options(QDict *options, QDict *old_options)
1846{
1847 bool has_new_overlap_template =
1848 qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
1849 qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
1850 bool has_new_total_cache_size =
1851 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
1852 bool has_all_cache_options;
1853
1854
1855 if (has_new_overlap_template) {
1856 qdict_del(old_options, QCOW2_OPT_OVERLAP);
1857 qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
1858 qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
1859 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
1860 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
1861 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
1862 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
1863 qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
1864 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
1865 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
1866 }
1867
1868
1869 if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
1870 qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
1871 qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1872 }
1873
1874 qdict_join(options, old_options, false);
1875
1876
1877
1878
1879
1880
1881 has_all_cache_options =
1882 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
1883 qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
1884 qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
1885
1886 if (has_all_cache_options && !has_new_total_cache_size) {
1887 qdict_del(options, QCOW2_OPT_CACHE_SIZE);
1888 }
1889}
1890
1891static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
1892 bool want_zero,
1893 int64_t offset, int64_t count,
1894 int64_t *pnum, int64_t *map,
1895 BlockDriverState **file)
1896{
1897 BDRVQcow2State *s = bs->opaque;
1898 uint64_t cluster_offset;
1899 int index_in_cluster, ret;
1900 unsigned int bytes;
1901 int status = 0;
1902
1903 bytes = MIN(INT_MAX, count);
1904 qemu_co_mutex_lock(&s->lock);
1905 ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
1906 qemu_co_mutex_unlock(&s->lock);
1907 if (ret < 0) {
1908 return ret;
1909 }
1910
1911 *pnum = bytes;
1912
1913 if ((ret == QCOW2_CLUSTER_NORMAL || ret == QCOW2_CLUSTER_ZERO_ALLOC) &&
1914 !s->crypto) {
1915 index_in_cluster = offset & (s->cluster_size - 1);
1916 *map = cluster_offset | index_in_cluster;
1917 *file = s->data_file->bs;
1918 status |= BDRV_BLOCK_OFFSET_VALID;
1919 }
1920 if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
1921 status |= BDRV_BLOCK_ZERO;
1922 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
1923 status |= BDRV_BLOCK_DATA;
1924 }
1925 return status;
1926}
1927
1928static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs,
1929 QCowL2Meta **pl2meta,
1930 bool link_l2)
1931{
1932 int ret = 0;
1933 QCowL2Meta *l2meta = *pl2meta;
1934
1935 while (l2meta != NULL) {
1936 QCowL2Meta *next;
1937
1938 if (link_l2) {
1939 ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
1940 if (ret) {
1941 goto out;
1942 }
1943 } else {
1944 qcow2_alloc_cluster_abort(bs, l2meta);
1945 }
1946
1947
1948 if (l2meta->nb_clusters != 0) {
1949 QLIST_REMOVE(l2meta, next_in_flight);
1950 }
1951
1952 qemu_co_queue_restart_all(&l2meta->dependent_requests);
1953
1954 next = l2meta->next;
1955 g_free(l2meta);
1956 l2meta = next;
1957 }
1958out:
1959 *pl2meta = l2meta;
1960 return ret;
1961}
1962
1963static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
1964 uint64_t bytes, QEMUIOVector *qiov,
1965 int flags)
1966{
1967 BDRVQcow2State *s = bs->opaque;
1968 int offset_in_cluster;
1969 int ret;
1970 unsigned int cur_bytes;
1971 uint64_t cluster_offset = 0;
1972 uint64_t bytes_done = 0;
1973 QEMUIOVector hd_qiov;
1974 uint8_t *cluster_data = NULL;
1975
1976 qemu_iovec_init(&hd_qiov, qiov->niov);
1977
1978 qemu_co_mutex_lock(&s->lock);
1979
1980 while (bytes != 0) {
1981
1982
1983 cur_bytes = MIN(bytes, INT_MAX);
1984 if (s->crypto) {
1985 cur_bytes = MIN(cur_bytes,
1986 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
1987 }
1988
1989 ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
1990 if (ret < 0) {
1991 goto fail;
1992 }
1993
1994 offset_in_cluster = offset_into_cluster(s, offset);
1995
1996 qemu_iovec_reset(&hd_qiov);
1997 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
1998
1999 switch (ret) {
2000 case QCOW2_CLUSTER_UNALLOCATED:
2001
2002 if (bs->backing) {
2003 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
2004 qemu_co_mutex_unlock(&s->lock);
2005 ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
2006 &hd_qiov, 0);
2007 qemu_co_mutex_lock(&s->lock);
2008 if (ret < 0) {
2009 goto fail;
2010 }
2011 } else {
2012
2013 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
2014 }
2015 break;
2016
2017 case QCOW2_CLUSTER_ZERO_PLAIN:
2018 case QCOW2_CLUSTER_ZERO_ALLOC:
2019 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
2020 break;
2021
2022 case QCOW2_CLUSTER_COMPRESSED:
2023 qemu_co_mutex_unlock(&s->lock);
2024 ret = qcow2_co_preadv_compressed(bs, cluster_offset,
2025 offset, cur_bytes,
2026 &hd_qiov);
2027 qemu_co_mutex_lock(&s->lock);
2028 if (ret < 0) {
2029 goto fail;
2030 }
2031
2032 break;
2033
2034 case QCOW2_CLUSTER_NORMAL:
2035 if ((cluster_offset & 511) != 0) {
2036 ret = -EIO;
2037 goto fail;
2038 }
2039
2040 if (bs->encrypted) {
2041 assert(s->crypto);
2042
2043
2044
2045
2046
2047 if (!cluster_data) {
2048 cluster_data =
2049 qemu_try_blockalign(s->data_file->bs,
2050 QCOW_MAX_CRYPT_CLUSTERS
2051 * s->cluster_size);
2052 if (cluster_data == NULL) {
2053 ret = -ENOMEM;
2054 goto fail;
2055 }
2056 }
2057
2058 assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2059 qemu_iovec_reset(&hd_qiov);
2060 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
2061 }
2062
2063 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
2064 qemu_co_mutex_unlock(&s->lock);
2065 ret = bdrv_co_preadv(s->data_file,
2066 cluster_offset + offset_in_cluster,
2067 cur_bytes, &hd_qiov, 0);
2068 qemu_co_mutex_lock(&s->lock);
2069 if (ret < 0) {
2070 goto fail;
2071 }
2072 if (bs->encrypted) {
2073 assert(s->crypto);
2074 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
2075 assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
2076 if (qcrypto_block_decrypt(s->crypto,
2077 (s->crypt_physical_offset ?
2078 cluster_offset + offset_in_cluster :
2079 offset),
2080 cluster_data,
2081 cur_bytes,
2082 NULL) < 0) {
2083 ret = -EIO;
2084 goto fail;
2085 }
2086 qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
2087 }
2088 break;
2089
2090 default:
2091 g_assert_not_reached();
2092 ret = -EIO;
2093 goto fail;
2094 }
2095
2096 bytes -= cur_bytes;
2097 offset += cur_bytes;
2098 bytes_done += cur_bytes;
2099 }
2100 ret = 0;
2101
2102fail:
2103 qemu_co_mutex_unlock(&s->lock);
2104
2105 qemu_iovec_destroy(&hd_qiov);
2106 qemu_vfree(cluster_data);
2107
2108 return ret;
2109}
2110
2111
2112
2113static bool merge_cow(uint64_t offset, unsigned bytes,
2114 QEMUIOVector *hd_qiov, QCowL2Meta *l2meta)
2115{
2116 QCowL2Meta *m;
2117
2118 for (m = l2meta; m != NULL; m = m->next) {
2119
2120 if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) {
2121 continue;
2122 }
2123
2124
2125
2126 if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
2127 continue;
2128 }
2129
2130
2131
2132 if (m->offset + m->cow_end.offset != offset + bytes) {
2133 continue;
2134 }
2135
2136
2137
2138 if (hd_qiov->niov > IOV_MAX - 2) {
2139 continue;
2140 }
2141
2142 m->data_qiov = hd_qiov;
2143 return true;
2144 }
2145
2146 return false;
2147}
2148
2149static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
2150 uint64_t bytes, QEMUIOVector *qiov,
2151 int flags)
2152{
2153 BDRVQcow2State *s = bs->opaque;
2154 int offset_in_cluster;
2155 int ret;
2156 unsigned int cur_bytes;
2157 uint64_t cluster_offset;
2158 QEMUIOVector hd_qiov;
2159 uint64_t bytes_done = 0;
2160 uint8_t *cluster_data = NULL;
2161 QCowL2Meta *l2meta = NULL;
2162
2163 trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
2164
2165 qemu_iovec_init(&hd_qiov, qiov->niov);
2166
2167 qemu_co_mutex_lock(&s->lock);
2168
2169 while (bytes != 0) {
2170
2171 l2meta = NULL;
2172
2173 trace_qcow2_writev_start_part(qemu_coroutine_self());
2174 offset_in_cluster = offset_into_cluster(s, offset);
2175 cur_bytes = MIN(bytes, INT_MAX);
2176 if (bs->encrypted) {
2177 cur_bytes = MIN(cur_bytes,
2178 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
2179 - offset_in_cluster);
2180 }
2181
2182 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
2183 &cluster_offset, &l2meta);
2184 if (ret < 0) {
2185 goto fail;
2186 }
2187
2188 assert((cluster_offset & 511) == 0);
2189
2190 qemu_iovec_reset(&hd_qiov);
2191 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
2192
2193 if (bs->encrypted) {
2194 assert(s->crypto);
2195 if (!cluster_data) {
2196 cluster_data = qemu_try_blockalign(bs->file->bs,
2197 QCOW_MAX_CRYPT_CLUSTERS
2198 * s->cluster_size);
2199 if (cluster_data == NULL) {
2200 ret = -ENOMEM;
2201 goto fail;
2202 }
2203 }
2204
2205 assert(hd_qiov.size <=
2206 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
2207 qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
2208
2209 if (qcrypto_block_encrypt(s->crypto,
2210 (s->crypt_physical_offset ?
2211 cluster_offset + offset_in_cluster :
2212 offset),
2213 cluster_data,
2214 cur_bytes, NULL) < 0) {
2215 ret = -EIO;
2216 goto fail;
2217 }
2218
2219 qemu_iovec_reset(&hd_qiov);
2220 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
2221 }
2222
2223 ret = qcow2_pre_write_overlap_check(bs, 0,
2224 cluster_offset + offset_in_cluster, cur_bytes, true);
2225 if (ret < 0) {
2226 goto fail;
2227 }
2228
2229
2230
2231
2232
2233 if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
2234 qemu_co_mutex_unlock(&s->lock);
2235 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
2236 trace_qcow2_writev_data(qemu_coroutine_self(),
2237 cluster_offset + offset_in_cluster);
2238 ret = bdrv_co_pwritev(s->data_file,
2239 cluster_offset + offset_in_cluster,
2240 cur_bytes, &hd_qiov, 0);
2241 qemu_co_mutex_lock(&s->lock);
2242 if (ret < 0) {
2243 goto fail;
2244 }
2245 }
2246
2247 ret = qcow2_handle_l2meta(bs, &l2meta, true);
2248 if (ret) {
2249 goto fail;
2250 }
2251
2252 bytes -= cur_bytes;
2253 offset += cur_bytes;
2254 bytes_done += cur_bytes;
2255 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
2256 }
2257 ret = 0;
2258
2259fail:
2260 qcow2_handle_l2meta(bs, &l2meta, false);
2261
2262 qemu_co_mutex_unlock(&s->lock);
2263
2264 qemu_iovec_destroy(&hd_qiov);
2265 qemu_vfree(cluster_data);
2266 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
2267
2268 return ret;
2269}
2270
2271static int qcow2_inactivate(BlockDriverState *bs)
2272{
2273 BDRVQcow2State *s = bs->opaque;
2274 int ret, result = 0;
2275 Error *local_err = NULL;
2276
2277 qcow2_store_persistent_dirty_bitmaps(bs, &local_err);
2278 if (local_err != NULL) {
2279 result = -EINVAL;
2280 error_reportf_err(local_err, "Lost persistent bitmaps during "
2281 "inactivation of node '%s': ",
2282 bdrv_get_device_or_node_name(bs));
2283 }
2284
2285 ret = qcow2_cache_flush(bs, s->l2_table_cache);
2286 if (ret) {
2287 result = ret;
2288 error_report("Failed to flush the L2 table cache: %s",
2289 strerror(-ret));
2290 }
2291
2292 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
2293 if (ret) {
2294 result = ret;
2295 error_report("Failed to flush the refcount block cache: %s",
2296 strerror(-ret));
2297 }
2298
2299 if (result == 0) {
2300 qcow2_mark_clean(bs);
2301 }
2302
2303 return result;
2304}
2305
2306static void qcow2_close(BlockDriverState *bs)
2307{
2308 BDRVQcow2State *s = bs->opaque;
2309 qemu_vfree(s->l1_table);
2310
2311 s->l1_table = NULL;
2312
2313 if (!(s->flags & BDRV_O_INACTIVE)) {
2314 qcow2_inactivate(bs);
2315 }
2316
2317 cache_clean_timer_del(bs);
2318 qcow2_cache_destroy(s->l2_table_cache);
2319 qcow2_cache_destroy(s->refcount_block_cache);
2320
2321 qcrypto_block_free(s->crypto);
2322 s->crypto = NULL;
2323
2324 g_free(s->unknown_header_fields);
2325 cleanup_unknown_header_ext(bs);
2326
2327 g_free(s->image_data_file);
2328 g_free(s->image_backing_file);
2329 g_free(s->image_backing_format);
2330
2331 if (has_data_file(bs)) {
2332 bdrv_unref_child(bs, s->data_file);
2333 }
2334
2335 qcow2_refcount_close(bs);
2336 qcow2_free_snapshots(bs);
2337}
2338
2339static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs,
2340 Error **errp)
2341{
2342 BDRVQcow2State *s = bs->opaque;
2343 int flags = s->flags;
2344 QCryptoBlock *crypto = NULL;
2345 QDict *options;
2346 Error *local_err = NULL;
2347 int ret;
2348
2349
2350
2351
2352
2353
2354 crypto = s->crypto;
2355 s->crypto = NULL;
2356
2357 qcow2_close(bs);
2358
2359 memset(s, 0, sizeof(BDRVQcow2State));
2360 options = qdict_clone_shallow(bs->options);
2361
2362 flags &= ~BDRV_O_INACTIVE;
2363 qemu_co_mutex_lock(&s->lock);
2364 ret = qcow2_do_open(bs, options, flags, &local_err);
2365 qemu_co_mutex_unlock(&s->lock);
2366 qobject_unref(options);
2367 if (local_err) {
2368 error_propagate_prepend(errp, local_err,
2369 "Could not reopen qcow2 layer: ");
2370 bs->drv = NULL;
2371 return;
2372 } else if (ret < 0) {
2373 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
2374 bs->drv = NULL;
2375 return;
2376 }
2377
2378 s->crypto = crypto;
2379}
2380
2381static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
2382 size_t len, size_t buflen)
2383{
2384 QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
2385 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
2386
2387 if (buflen < ext_len) {
2388 return -ENOSPC;
2389 }
2390
2391 *ext_backing_fmt = (QCowExtension) {
2392 .magic = cpu_to_be32(magic),
2393 .len = cpu_to_be32(len),
2394 };
2395
2396 if (len) {
2397 memcpy(buf + sizeof(QCowExtension), s, len);
2398 }
2399
2400 return ext_len;
2401}
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411int qcow2_update_header(BlockDriverState *bs)
2412{
2413 BDRVQcow2State *s = bs->opaque;
2414 QCowHeader *header;
2415 char *buf;
2416 size_t buflen = s->cluster_size;
2417 int ret;
2418 uint64_t total_size;
2419 uint32_t refcount_table_clusters;
2420 size_t header_length;
2421 Qcow2UnknownHeaderExtension *uext;
2422
2423 buf = qemu_blockalign(bs, buflen);
2424
2425
2426 header = (QCowHeader*) buf;
2427
2428 if (buflen < sizeof(*header)) {
2429 ret = -ENOSPC;
2430 goto fail;
2431 }
2432
2433 header_length = sizeof(*header) + s->unknown_header_fields_size;
2434 total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
2435 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
2436
2437 *header = (QCowHeader) {
2438
2439 .magic = cpu_to_be32(QCOW_MAGIC),
2440 .version = cpu_to_be32(s->qcow_version),
2441 .backing_file_offset = 0,
2442 .backing_file_size = 0,
2443 .cluster_bits = cpu_to_be32(s->cluster_bits),
2444 .size = cpu_to_be64(total_size),
2445 .crypt_method = cpu_to_be32(s->crypt_method_header),
2446 .l1_size = cpu_to_be32(s->l1_size),
2447 .l1_table_offset = cpu_to_be64(s->l1_table_offset),
2448 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
2449 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
2450 .nb_snapshots = cpu_to_be32(s->nb_snapshots),
2451 .snapshots_offset = cpu_to_be64(s->snapshots_offset),
2452
2453
2454 .incompatible_features = cpu_to_be64(s->incompatible_features),
2455 .compatible_features = cpu_to_be64(s->compatible_features),
2456 .autoclear_features = cpu_to_be64(s->autoclear_features),
2457 .refcount_order = cpu_to_be32(s->refcount_order),
2458 .header_length = cpu_to_be32(header_length),
2459 };
2460
2461
2462 switch (s->qcow_version) {
2463 case 2:
2464 ret = offsetof(QCowHeader, incompatible_features);
2465 break;
2466 case 3:
2467 ret = sizeof(*header);
2468 break;
2469 default:
2470 ret = -EINVAL;
2471 goto fail;
2472 }
2473
2474 buf += ret;
2475 buflen -= ret;
2476 memset(buf, 0, buflen);
2477
2478
2479 if (s->unknown_header_fields_size) {
2480 if (buflen < s->unknown_header_fields_size) {
2481 ret = -ENOSPC;
2482 goto fail;
2483 }
2484
2485 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
2486 buf += s->unknown_header_fields_size;
2487 buflen -= s->unknown_header_fields_size;
2488 }
2489
2490
2491 if (s->image_backing_format) {
2492 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
2493 s->image_backing_format,
2494 strlen(s->image_backing_format),
2495 buflen);
2496 if (ret < 0) {
2497 goto fail;
2498 }
2499
2500 buf += ret;
2501 buflen -= ret;
2502 }
2503
2504
2505 if (has_data_file(bs) && s->image_data_file) {
2506 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE,
2507 s->image_data_file, strlen(s->image_data_file),
2508 buflen);
2509 if (ret < 0) {
2510 goto fail;
2511 }
2512
2513 buf += ret;
2514 buflen -= ret;
2515 }
2516
2517
2518 if (s->crypto_header.offset != 0) {
2519 s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset);
2520 s->crypto_header.length = cpu_to_be64(s->crypto_header.length);
2521 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER,
2522 &s->crypto_header, sizeof(s->crypto_header),
2523 buflen);
2524 s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset);
2525 s->crypto_header.length = be64_to_cpu(s->crypto_header.length);
2526 if (ret < 0) {
2527 goto fail;
2528 }
2529 buf += ret;
2530 buflen -= ret;
2531 }
2532
2533
2534 if (s->qcow_version >= 3) {
2535 Qcow2Feature features[] = {
2536 {
2537 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2538 .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
2539 .name = "dirty bit",
2540 },
2541 {
2542 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2543 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
2544 .name = "corrupt bit",
2545 },
2546 {
2547 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
2548 .bit = QCOW2_INCOMPAT_DATA_FILE_BITNR,
2549 .name = "external data file",
2550 },
2551 {
2552 .type = QCOW2_FEAT_TYPE_COMPATIBLE,
2553 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
2554 .name = "lazy refcounts",
2555 },
2556 };
2557
2558 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
2559 features, sizeof(features), buflen);
2560 if (ret < 0) {
2561 goto fail;
2562 }
2563 buf += ret;
2564 buflen -= ret;
2565 }
2566
2567
2568 if (s->nb_bitmaps > 0) {
2569 Qcow2BitmapHeaderExt bitmaps_header = {
2570 .nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
2571 .bitmap_directory_size =
2572 cpu_to_be64(s->bitmap_directory_size),
2573 .bitmap_directory_offset =
2574 cpu_to_be64(s->bitmap_directory_offset)
2575 };
2576 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS,
2577 &bitmaps_header, sizeof(bitmaps_header),
2578 buflen);
2579 if (ret < 0) {
2580 goto fail;
2581 }
2582 buf += ret;
2583 buflen -= ret;
2584 }
2585
2586
2587 QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
2588 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
2589 if (ret < 0) {
2590 goto fail;
2591 }
2592
2593 buf += ret;
2594 buflen -= ret;
2595 }
2596
2597
2598 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
2599 if (ret < 0) {
2600 goto fail;
2601 }
2602
2603 buf += ret;
2604 buflen -= ret;
2605
2606
2607 if (s->image_backing_file) {
2608 size_t backing_file_len = strlen(s->image_backing_file);
2609
2610 if (buflen < backing_file_len) {
2611 ret = -ENOSPC;
2612 goto fail;
2613 }
2614
2615
2616 strncpy(buf, s->image_backing_file, buflen);
2617
2618 header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
2619 header->backing_file_size = cpu_to_be32(backing_file_len);
2620 }
2621
2622
2623 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
2624 if (ret < 0) {
2625 goto fail;
2626 }
2627
2628 ret = 0;
2629fail:
2630 qemu_vfree(header);
2631 return ret;
2632}
2633
2634static int qcow2_change_backing_file(BlockDriverState *bs,
2635 const char *backing_file, const char *backing_fmt)
2636{
2637 BDRVQcow2State *s = bs->opaque;
2638
2639
2640
2641 if (backing_file && data_file_is_raw(bs)) {
2642 return -EINVAL;
2643 }
2644
2645 if (backing_file && strlen(backing_file) > 1023) {
2646 return -EINVAL;
2647 }
2648
2649 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
2650 backing_file ?: "");
2651 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2652 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2653
2654 g_free(s->image_backing_file);
2655 g_free(s->image_backing_format);
2656
2657 s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
2658 s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
2659
2660 return qcow2_update_header(bs);
2661}
2662
2663static int qcow2_crypt_method_from_format(const char *encryptfmt)
2664{
2665 if (g_str_equal(encryptfmt, "luks")) {
2666 return QCOW_CRYPT_LUKS;
2667 } else if (g_str_equal(encryptfmt, "aes")) {
2668 return QCOW_CRYPT_AES;
2669 } else {
2670 return -EINVAL;
2671 }
2672}
2673
2674static int qcow2_set_up_encryption(BlockDriverState *bs,
2675 QCryptoBlockCreateOptions *cryptoopts,
2676 Error **errp)
2677{
2678 BDRVQcow2State *s = bs->opaque;
2679 QCryptoBlock *crypto = NULL;
2680 int fmt, ret;
2681
2682 switch (cryptoopts->format) {
2683 case Q_CRYPTO_BLOCK_FORMAT_LUKS:
2684 fmt = QCOW_CRYPT_LUKS;
2685 break;
2686 case Q_CRYPTO_BLOCK_FORMAT_QCOW:
2687 fmt = QCOW_CRYPT_AES;
2688 break;
2689 default:
2690 error_setg(errp, "Crypto format not supported in qcow2");
2691 return -EINVAL;
2692 }
2693
2694 s->crypt_method_header = fmt;
2695
2696 crypto = qcrypto_block_create(cryptoopts, "encrypt.",
2697 qcow2_crypto_hdr_init_func,
2698 qcow2_crypto_hdr_write_func,
2699 bs, errp);
2700 if (!crypto) {
2701 return -EINVAL;
2702 }
2703
2704 ret = qcow2_update_header(bs);
2705 if (ret < 0) {
2706 error_setg_errno(errp, -ret, "Could not write encryption header");
2707 goto out;
2708 }
2709
2710 ret = 0;
2711 out:
2712 qcrypto_block_free(crypto);
2713 return ret;
2714}
2715
2716
2717
2718
2719
2720
2721
2722
2723static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
2724 uint64_t new_length)
2725{
2726 BDRVQcow2State *s = bs->opaque;
2727 uint64_t bytes;
2728 uint64_t host_offset = 0;
2729 unsigned int cur_bytes;
2730 int ret;
2731 QCowL2Meta *meta;
2732
2733 assert(offset <= new_length);
2734 bytes = new_length - offset;
2735
2736 while (bytes) {
2737 cur_bytes = MIN(bytes, INT_MAX);
2738 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
2739 &host_offset, &meta);
2740 if (ret < 0) {
2741 return ret;
2742 }
2743
2744 while (meta) {
2745 QCowL2Meta *next = meta->next;
2746
2747 ret = qcow2_alloc_cluster_link_l2(bs, meta);
2748 if (ret < 0) {
2749 qcow2_free_any_clusters(bs, meta->alloc_offset,
2750 meta->nb_clusters, QCOW2_DISCARD_NEVER);
2751 return ret;
2752 }
2753
2754
2755
2756 QLIST_REMOVE(meta, next_in_flight);
2757
2758 g_free(meta);
2759 meta = next;
2760 }
2761
2762
2763
2764 bytes -= cur_bytes;
2765 offset += cur_bytes;
2766 }
2767
2768
2769
2770
2771
2772
2773 if (host_offset != 0) {
2774 uint8_t data = 0;
2775 ret = bdrv_pwrite(s->data_file, (host_offset + cur_bytes) - 1,
2776 &data, 1);
2777 if (ret < 0) {
2778 return ret;
2779 }
2780 }
2781
2782 return 0;
2783}
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
2795 int refcount_order, bool generous_increase,
2796 uint64_t *refblock_count)
2797{
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807 int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t);
2808 int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order);
2809 int64_t table = 0;
2810 int64_t blocks = 0;
2811 int64_t last;
2812 int64_t n = 0;
2813
2814 do {
2815 last = n;
2816 blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block);
2817 table = DIV_ROUND_UP(blocks, blocks_per_table_cluster);
2818 n = clusters + blocks + table;
2819
2820 if (n == last && generous_increase) {
2821 clusters += DIV_ROUND_UP(table, 2);
2822 n = 0;
2823 generous_increase = false;
2824 }
2825 } while (n != last);
2826
2827 if (refblock_count) {
2828 *refblock_count = blocks;
2829 }
2830
2831 return (blocks + table) * cluster_size;
2832}
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843static int64_t qcow2_calc_prealloc_size(int64_t total_size,
2844 size_t cluster_size,
2845 int refcount_order)
2846{
2847 int64_t meta_size = 0;
2848 uint64_t nl1e, nl2e;
2849 int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
2850
2851
2852 meta_size += cluster_size;
2853
2854
2855 nl2e = aligned_total_size / cluster_size;
2856 nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t));
2857 meta_size += nl2e * sizeof(uint64_t);
2858
2859
2860 nl1e = nl2e * sizeof(uint64_t) / cluster_size;
2861 nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t));
2862 meta_size += nl1e * sizeof(uint64_t);
2863
2864
2865 meta_size += qcow2_refcount_metadata_size(
2866 (meta_size + aligned_total_size) / cluster_size,
2867 cluster_size, refcount_order, false, NULL);
2868
2869 return meta_size + aligned_total_size;
2870}
2871
2872static bool validate_cluster_size(size_t cluster_size, Error **errp)
2873{
2874 int cluster_bits = ctz32(cluster_size);
2875 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
2876 (1 << cluster_bits) != cluster_size)
2877 {
2878 error_setg(errp, "Cluster size must be a power of two between %d and "
2879 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
2880 return false;
2881 }
2882 return true;
2883}
2884
2885static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp)
2886{
2887 size_t cluster_size;
2888
2889 cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
2890 DEFAULT_CLUSTER_SIZE);
2891 if (!validate_cluster_size(cluster_size, errp)) {
2892 return 0;
2893 }
2894 return cluster_size;
2895}
2896
2897static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp)
2898{
2899 char *buf;
2900 int ret;
2901
2902 buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
2903 if (!buf) {
2904 ret = 3;
2905 } else if (!strcmp(buf, "0.10")) {
2906 ret = 2;
2907 } else if (!strcmp(buf, "1.1")) {
2908 ret = 3;
2909 } else {
2910 error_setg(errp, "Invalid compatibility level: '%s'", buf);
2911 ret = -EINVAL;
2912 }
2913 g_free(buf);
2914 return ret;
2915}
2916
2917static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
2918 Error **errp)
2919{
2920 uint64_t refcount_bits;
2921
2922 refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16);
2923 if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
2924 error_setg(errp, "Refcount width must be a power of two and may not "
2925 "exceed 64 bits");
2926 return 0;
2927 }
2928
2929 if (version < 3 && refcount_bits != 16) {
2930 error_setg(errp, "Different refcount widths than 16 bits require "
2931 "compatibility level 1.1 or above (use compat=1.1 or "
2932 "greater)");
2933 return 0;
2934 }
2935
2936 return refcount_bits;
2937}
2938
2939static int coroutine_fn
2940qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
2941{
2942 BlockdevCreateOptionsQcow2 *qcow2_opts;
2943 QDict *options;
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957 BlockBackend *blk = NULL;
2958 BlockDriverState *bs = NULL;
2959 BlockDriverState *data_bs = NULL;
2960 QCowHeader *header;
2961 size_t cluster_size;
2962 int version;
2963 int refcount_order;
2964 uint64_t* refcount_table;
2965 Error *local_err = NULL;
2966 int ret;
2967
2968 assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2);
2969 qcow2_opts = &create_options->u.qcow2;
2970
2971 bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp);
2972 if (bs == NULL) {
2973 return -EIO;
2974 }
2975
2976
2977 if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) {
2978 error_setg(errp, "Image size must be a multiple of 512 bytes");
2979 ret = -EINVAL;
2980 goto out;
2981 }
2982
2983 if (qcow2_opts->has_version) {
2984 switch (qcow2_opts->version) {
2985 case BLOCKDEV_QCOW2_VERSION_V2:
2986 version = 2;
2987 break;
2988 case BLOCKDEV_QCOW2_VERSION_V3:
2989 version = 3;
2990 break;
2991 default:
2992 g_assert_not_reached();
2993 }
2994 } else {
2995 version = 3;
2996 }
2997
2998 if (qcow2_opts->has_cluster_size) {
2999 cluster_size = qcow2_opts->cluster_size;
3000 } else {
3001 cluster_size = DEFAULT_CLUSTER_SIZE;
3002 }
3003
3004 if (!validate_cluster_size(cluster_size, errp)) {
3005 ret = -EINVAL;
3006 goto out;
3007 }
3008
3009 if (!qcow2_opts->has_preallocation) {
3010 qcow2_opts->preallocation = PREALLOC_MODE_OFF;
3011 }
3012 if (qcow2_opts->has_backing_file &&
3013 qcow2_opts->preallocation != PREALLOC_MODE_OFF)
3014 {
3015 error_setg(errp, "Backing file and preallocation cannot be used at "
3016 "the same time");
3017 ret = -EINVAL;
3018 goto out;
3019 }
3020 if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) {
3021 error_setg(errp, "Backing format cannot be used without backing file");
3022 ret = -EINVAL;
3023 goto out;
3024 }
3025
3026 if (!qcow2_opts->has_lazy_refcounts) {
3027 qcow2_opts->lazy_refcounts = false;
3028 }
3029 if (version < 3 && qcow2_opts->lazy_refcounts) {
3030 error_setg(errp, "Lazy refcounts only supported with compatibility "
3031 "level 1.1 and above (use version=v3 or greater)");
3032 ret = -EINVAL;
3033 goto out;
3034 }
3035
3036 if (!qcow2_opts->has_refcount_bits) {
3037 qcow2_opts->refcount_bits = 16;
3038 }
3039 if (qcow2_opts->refcount_bits > 64 ||
3040 !is_power_of_2(qcow2_opts->refcount_bits))
3041 {
3042 error_setg(errp, "Refcount width must be a power of two and may not "
3043 "exceed 64 bits");
3044 ret = -EINVAL;
3045 goto out;
3046 }
3047 if (version < 3 && qcow2_opts->refcount_bits != 16) {
3048 error_setg(errp, "Different refcount widths than 16 bits require "
3049 "compatibility level 1.1 or above (use version=v3 or "
3050 "greater)");
3051 ret = -EINVAL;
3052 goto out;
3053 }
3054 refcount_order = ctz32(qcow2_opts->refcount_bits);
3055
3056 if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) {
3057 error_setg(errp, "data-file-raw requires data-file");
3058 ret = -EINVAL;
3059 goto out;
3060 }
3061 if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) {
3062 error_setg(errp, "Backing file and data-file-raw cannot be used at "
3063 "the same time");
3064 ret = -EINVAL;
3065 goto out;
3066 }
3067
3068 if (qcow2_opts->data_file) {
3069 if (version < 3) {
3070 error_setg(errp, "External data files are only supported with "
3071 "compatibility level 1.1 and above (use version=v3 or "
3072 "greater)");
3073 ret = -EINVAL;
3074 goto out;
3075 }
3076 data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp);
3077 if (data_bs == NULL) {
3078 ret = -EIO;
3079 goto out;
3080 }
3081 }
3082
3083
3084 blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
3085 ret = blk_insert_bs(blk, bs, errp);
3086 if (ret < 0) {
3087 goto out;
3088 }
3089 blk_set_allow_write_beyond_eof(blk, true);
3090
3091
3092 ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp);
3093 if (ret < 0) {
3094 goto out;
3095 }
3096
3097
3098 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
3099 header = g_malloc0(cluster_size);
3100 *header = (QCowHeader) {
3101 .magic = cpu_to_be32(QCOW_MAGIC),
3102 .version = cpu_to_be32(version),
3103 .cluster_bits = cpu_to_be32(ctz32(cluster_size)),
3104 .size = cpu_to_be64(0),
3105 .l1_table_offset = cpu_to_be64(0),
3106 .l1_size = cpu_to_be32(0),
3107 .refcount_table_offset = cpu_to_be64(cluster_size),
3108 .refcount_table_clusters = cpu_to_be32(1),
3109 .refcount_order = cpu_to_be32(refcount_order),
3110 .header_length = cpu_to_be32(sizeof(*header)),
3111 };
3112
3113
3114 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
3115
3116 if (qcow2_opts->lazy_refcounts) {
3117 header->compatible_features |=
3118 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
3119 }
3120 if (data_bs) {
3121 header->incompatible_features |=
3122 cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE);
3123 }
3124 if (qcow2_opts->data_file_raw) {
3125 header->autoclear_features |=
3126 cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW);
3127 }
3128
3129 ret = blk_pwrite(blk, 0, header, cluster_size, 0);
3130 g_free(header);
3131 if (ret < 0) {
3132 error_setg_errno(errp, -ret, "Could not write qcow2 header");
3133 goto out;
3134 }
3135
3136
3137 refcount_table = g_malloc0(2 * cluster_size);
3138 refcount_table[0] = cpu_to_be64(2 * cluster_size);
3139 ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
3140 g_free(refcount_table);
3141
3142 if (ret < 0) {
3143 error_setg_errno(errp, -ret, "Could not write refcount table");
3144 goto out;
3145 }
3146
3147 blk_unref(blk);
3148 blk = NULL;
3149
3150
3151
3152
3153
3154
3155 options = qdict_new();
3156 qdict_put_str(options, "driver", "qcow2");
3157 qdict_put_str(options, "file", bs->node_name);
3158 if (data_bs) {
3159 qdict_put_str(options, "data-file", data_bs->node_name);
3160 }
3161 blk = blk_new_open(NULL, NULL, options,
3162 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
3163 &local_err);
3164 if (blk == NULL) {
3165 error_propagate(errp, local_err);
3166 ret = -EIO;
3167 goto out;
3168 }
3169
3170 ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
3171 if (ret < 0) {
3172 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
3173 "header and refcount table");
3174 goto out;
3175
3176 } else if (ret != 0) {
3177 error_report("Huh, first cluster in empty image is already in use?");
3178 abort();
3179 }
3180
3181
3182 if (data_bs) {
3183 BDRVQcow2State *s = blk_bs(blk)->opaque;
3184 s->image_data_file = g_strdup(data_bs->filename);
3185 }
3186
3187
3188 ret = qcow2_update_header(blk_bs(blk));
3189 if (ret < 0) {
3190 error_setg_errno(errp, -ret, "Could not update qcow2 header");
3191 goto out;
3192 }
3193
3194
3195 ret = blk_truncate(blk, qcow2_opts->size, qcow2_opts->preallocation, errp);
3196 if (ret < 0) {
3197 error_prepend(errp, "Could not resize image: ");
3198 goto out;
3199 }
3200
3201
3202 if (qcow2_opts->has_backing_file) {
3203 const char *backing_format = NULL;
3204
3205 if (qcow2_opts->has_backing_fmt) {
3206 backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt);
3207 }
3208
3209 ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file,
3210 backing_format);
3211 if (ret < 0) {
3212 error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
3213 "with format '%s'", qcow2_opts->backing_file,
3214 backing_format);
3215 goto out;
3216 }
3217 }
3218
3219
3220 if (qcow2_opts->has_encrypt) {
3221 ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
3222 if (ret < 0) {
3223 goto out;
3224 }
3225 }
3226
3227 blk_unref(blk);
3228 blk = NULL;
3229
3230
3231
3232
3233
3234
3235
3236 options = qdict_new();
3237 qdict_put_str(options, "driver", "qcow2");
3238 qdict_put_str(options, "file", bs->node_name);
3239 if (data_bs) {
3240 qdict_put_str(options, "data-file", data_bs->node_name);
3241 }
3242 blk = blk_new_open(NULL, NULL, options,
3243 BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO,
3244 &local_err);
3245 if (blk == NULL) {
3246 error_propagate(errp, local_err);
3247 ret = -EIO;
3248 goto out;
3249 }
3250
3251 ret = 0;
3252out:
3253 blk_unref(blk);
3254 bdrv_unref(bs);
3255 bdrv_unref(data_bs);
3256 return ret;
3257}
3258
3259static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts,
3260 Error **errp)
3261{
3262 BlockdevCreateOptions *create_options = NULL;
3263 QDict *qdict;
3264 Visitor *v;
3265 BlockDriverState *bs = NULL;
3266 BlockDriverState *data_bs = NULL;
3267 Error *local_err = NULL;
3268 const char *val;
3269 int ret;
3270
3271
3272
3273
3274
3275 qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts,
3276 true);
3277
3278
3279 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT);
3280 if (val && !strcmp(val, "on")) {
3281 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow");
3282 } else if (val && !strcmp(val, "off")) {
3283 qdict_del(qdict, BLOCK_OPT_ENCRYPT);
3284 }
3285
3286 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT);
3287 if (val && !strcmp(val, "aes")) {
3288 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow");
3289 }
3290
3291
3292
3293 val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL);
3294 if (val && !strcmp(val, "0.10")) {
3295 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2");
3296 } else if (val && !strcmp(val, "1.1")) {
3297 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3");
3298 }
3299
3300
3301 static const QDictRenames opt_renames[] = {
3302 { BLOCK_OPT_BACKING_FILE, "backing-file" },
3303 { BLOCK_OPT_BACKING_FMT, "backing-fmt" },
3304 { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" },
3305 { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" },
3306 { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" },
3307 { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT },
3308 { BLOCK_OPT_COMPAT_LEVEL, "version" },
3309 { BLOCK_OPT_DATA_FILE_RAW, "data-file-raw" },
3310 { NULL, NULL },
3311 };
3312
3313 if (!qdict_rename_keys(qdict, opt_renames, errp)) {
3314 ret = -EINVAL;
3315 goto finish;
3316 }
3317
3318
3319 ret = bdrv_create_file(filename, opts, errp);
3320 if (ret < 0) {
3321 goto finish;
3322 }
3323
3324 bs = bdrv_open(filename, NULL, NULL,
3325 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
3326 if (bs == NULL) {
3327 ret = -EIO;
3328 goto finish;
3329 }
3330
3331
3332 val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
3333 if (val) {
3334 ret = bdrv_create_file(val, opts, errp);
3335 if (ret < 0) {
3336 goto finish;
3337 }
3338
3339 data_bs = bdrv_open(val, NULL, NULL,
3340 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
3341 errp);
3342 if (data_bs == NULL) {
3343 ret = -EIO;
3344 goto finish;
3345 }
3346
3347 qdict_del(qdict, BLOCK_OPT_DATA_FILE);
3348 qdict_put_str(qdict, "data-file", data_bs->node_name);
3349 }
3350
3351
3352 qdict_put_str(qdict, "driver", "qcow2");
3353 qdict_put_str(qdict, "file", bs->node_name);
3354
3355
3356 v = qobject_input_visitor_new_flat_confused(qdict, errp);
3357 if (!v) {
3358 ret = -EINVAL;
3359 goto finish;
3360 }
3361
3362 visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
3363 visit_free(v);
3364
3365 if (local_err) {
3366 error_propagate(errp, local_err);
3367 ret = -EINVAL;
3368 goto finish;
3369 }
3370
3371
3372 create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size,
3373 BDRV_SECTOR_SIZE);
3374
3375
3376 ret = qcow2_co_create(create_options, errp);
3377 if (ret < 0) {
3378 goto finish;
3379 }
3380
3381 ret = 0;
3382finish:
3383 qobject_unref(qdict);
3384 bdrv_unref(bs);
3385 bdrv_unref(data_bs);
3386 qapi_free_BlockdevCreateOptions(create_options);
3387 return ret;
3388}
3389
3390
3391static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
3392{
3393 int64_t nr;
3394 int res;
3395
3396
3397 if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) {
3398 bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset;
3399 }
3400
3401 if (!bytes) {
3402 return true;
3403 }
3404 res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
3405 return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
3406}
3407
3408static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
3409 int64_t offset, int bytes, BdrvRequestFlags flags)
3410{
3411 int ret;
3412 BDRVQcow2State *s = bs->opaque;
3413
3414 uint32_t head = offset % s->cluster_size;
3415 uint32_t tail = (offset + bytes) % s->cluster_size;
3416
3417 trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes);
3418 if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) {
3419 tail = 0;
3420 }
3421
3422 if (head || tail) {
3423 uint64_t off;
3424 unsigned int nr;
3425
3426 assert(head + bytes <= s->cluster_size);
3427
3428
3429 if (!(is_zero(bs, offset - head, head) &&
3430 is_zero(bs, offset + bytes,
3431 tail ? s->cluster_size - tail : 0))) {
3432 return -ENOTSUP;
3433 }
3434
3435 qemu_co_mutex_lock(&s->lock);
3436
3437 offset = QEMU_ALIGN_DOWN(offset, s->cluster_size);
3438 bytes = s->cluster_size;
3439 nr = s->cluster_size;
3440 ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
3441 if (ret != QCOW2_CLUSTER_UNALLOCATED &&
3442 ret != QCOW2_CLUSTER_ZERO_PLAIN &&
3443 ret != QCOW2_CLUSTER_ZERO_ALLOC) {
3444 qemu_co_mutex_unlock(&s->lock);
3445 return -ENOTSUP;
3446 }
3447 } else {
3448 qemu_co_mutex_lock(&s->lock);
3449 }
3450
3451 trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes);
3452
3453
3454 ret = qcow2_cluster_zeroize(bs, offset, bytes, flags);
3455 qemu_co_mutex_unlock(&s->lock);
3456
3457 return ret;
3458}
3459
3460static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
3461 int64_t offset, int bytes)
3462{
3463 int ret;
3464 BDRVQcow2State *s = bs->opaque;
3465
3466 if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) {
3467 assert(bytes < s->cluster_size);
3468
3469
3470 if (!QEMU_IS_ALIGNED(offset, s->cluster_size) ||
3471 offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) {
3472 return -ENOTSUP;
3473 }
3474 }
3475
3476 qemu_co_mutex_lock(&s->lock);
3477 ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
3478 false);
3479 qemu_co_mutex_unlock(&s->lock);
3480 return ret;
3481}
3482
3483static int coroutine_fn
3484qcow2_co_copy_range_from(BlockDriverState *bs,
3485 BdrvChild *src, uint64_t src_offset,
3486 BdrvChild *dst, uint64_t dst_offset,
3487 uint64_t bytes, BdrvRequestFlags read_flags,
3488 BdrvRequestFlags write_flags)
3489{
3490 BDRVQcow2State *s = bs->opaque;
3491 int ret;
3492 unsigned int cur_bytes;
3493 BdrvChild *child = NULL;
3494 BdrvRequestFlags cur_write_flags;
3495
3496 assert(!bs->encrypted);
3497 qemu_co_mutex_lock(&s->lock);
3498
3499 while (bytes != 0) {
3500 uint64_t copy_offset = 0;
3501
3502 cur_bytes = MIN(bytes, INT_MAX);
3503 cur_write_flags = write_flags;
3504
3505 ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset);
3506 if (ret < 0) {
3507 goto out;
3508 }
3509
3510 switch (ret) {
3511 case QCOW2_CLUSTER_UNALLOCATED:
3512 if (bs->backing && bs->backing->bs) {
3513 int64_t backing_length = bdrv_getlength(bs->backing->bs);
3514 if (src_offset >= backing_length) {
3515 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3516 } else {
3517 child = bs->backing;
3518 cur_bytes = MIN(cur_bytes, backing_length - src_offset);
3519 copy_offset = src_offset;
3520 }
3521 } else {
3522 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3523 }
3524 break;
3525
3526 case QCOW2_CLUSTER_ZERO_PLAIN:
3527 case QCOW2_CLUSTER_ZERO_ALLOC:
3528 cur_write_flags |= BDRV_REQ_ZERO_WRITE;
3529 break;
3530
3531 case QCOW2_CLUSTER_COMPRESSED:
3532 ret = -ENOTSUP;
3533 goto out;
3534
3535 case QCOW2_CLUSTER_NORMAL:
3536 child = s->data_file;
3537 copy_offset += offset_into_cluster(s, src_offset);
3538 if ((copy_offset & 511) != 0) {
3539 ret = -EIO;
3540 goto out;
3541 }
3542 break;
3543
3544 default:
3545 abort();
3546 }
3547 qemu_co_mutex_unlock(&s->lock);
3548 ret = bdrv_co_copy_range_from(child,
3549 copy_offset,
3550 dst, dst_offset,
3551 cur_bytes, read_flags, cur_write_flags);
3552 qemu_co_mutex_lock(&s->lock);
3553 if (ret < 0) {
3554 goto out;
3555 }
3556
3557 bytes -= cur_bytes;
3558 src_offset += cur_bytes;
3559 dst_offset += cur_bytes;
3560 }
3561 ret = 0;
3562
3563out:
3564 qemu_co_mutex_unlock(&s->lock);
3565 return ret;
3566}
3567
3568static int coroutine_fn
3569qcow2_co_copy_range_to(BlockDriverState *bs,
3570 BdrvChild *src, uint64_t src_offset,
3571 BdrvChild *dst, uint64_t dst_offset,
3572 uint64_t bytes, BdrvRequestFlags read_flags,
3573 BdrvRequestFlags write_flags)
3574{
3575 BDRVQcow2State *s = bs->opaque;
3576 int offset_in_cluster;
3577 int ret;
3578 unsigned int cur_bytes;
3579 uint64_t cluster_offset;
3580 QCowL2Meta *l2meta = NULL;
3581
3582 assert(!bs->encrypted);
3583
3584 qemu_co_mutex_lock(&s->lock);
3585
3586 while (bytes != 0) {
3587
3588 l2meta = NULL;
3589
3590 offset_in_cluster = offset_into_cluster(s, dst_offset);
3591 cur_bytes = MIN(bytes, INT_MAX);
3592
3593
3594
3595
3596
3597 ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes,
3598 &cluster_offset, &l2meta);
3599 if (ret < 0) {
3600 goto fail;
3601 }
3602
3603 assert((cluster_offset & 511) == 0);
3604
3605 ret = qcow2_pre_write_overlap_check(bs, 0,
3606 cluster_offset + offset_in_cluster, cur_bytes, true);
3607 if (ret < 0) {
3608 goto fail;
3609 }
3610
3611 qemu_co_mutex_unlock(&s->lock);
3612 ret = bdrv_co_copy_range_to(src, src_offset,
3613 s->data_file,
3614 cluster_offset + offset_in_cluster,
3615 cur_bytes, read_flags, write_flags);
3616 qemu_co_mutex_lock(&s->lock);
3617 if (ret < 0) {
3618 goto fail;
3619 }
3620
3621 ret = qcow2_handle_l2meta(bs, &l2meta, true);
3622 if (ret) {
3623 goto fail;
3624 }
3625
3626 bytes -= cur_bytes;
3627 src_offset += cur_bytes;
3628 dst_offset += cur_bytes;
3629 }
3630 ret = 0;
3631
3632fail:
3633 qcow2_handle_l2meta(bs, &l2meta, false);
3634
3635 qemu_co_mutex_unlock(&s->lock);
3636
3637 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
3638
3639 return ret;
3640}
3641
3642static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
3643 PreallocMode prealloc, Error **errp)
3644{
3645 BDRVQcow2State *s = bs->opaque;
3646 uint64_t old_length;
3647 int64_t new_l1_size;
3648 int ret;
3649 QDict *options;
3650
3651 if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA &&
3652 prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL)
3653 {
3654 error_setg(errp, "Unsupported preallocation mode '%s'",
3655 PreallocMode_str(prealloc));
3656 return -ENOTSUP;
3657 }
3658
3659 if (offset & 511) {
3660 error_setg(errp, "The new size must be a multiple of 512");
3661 return -EINVAL;
3662 }
3663
3664 qemu_co_mutex_lock(&s->lock);
3665
3666
3667 if (s->nb_snapshots) {
3668 error_setg(errp, "Can't resize an image which has snapshots");
3669 ret = -ENOTSUP;
3670 goto fail;
3671 }
3672
3673
3674 if (qcow2_truncate_bitmaps_check(bs, errp)) {
3675 ret = -ENOTSUP;
3676 goto fail;
3677 }
3678
3679 old_length = bs->total_sectors * BDRV_SECTOR_SIZE;
3680 new_l1_size = size_to_l1(s, offset);
3681
3682 if (offset < old_length) {
3683 int64_t last_cluster, old_file_size;
3684 if (prealloc != PREALLOC_MODE_OFF) {
3685 error_setg(errp,
3686 "Preallocation can't be used for shrinking an image");
3687 ret = -EINVAL;
3688 goto fail;
3689 }
3690
3691 ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
3692 old_length - ROUND_UP(offset,
3693 s->cluster_size),
3694 QCOW2_DISCARD_ALWAYS, true);
3695 if (ret < 0) {
3696 error_setg_errno(errp, -ret, "Failed to discard cropped clusters");
3697 goto fail;
3698 }
3699
3700 ret = qcow2_shrink_l1_table(bs, new_l1_size);
3701 if (ret < 0) {
3702 error_setg_errno(errp, -ret,
3703 "Failed to reduce the number of L2 tables");
3704 goto fail;
3705 }
3706
3707 ret = qcow2_shrink_reftable(bs);
3708 if (ret < 0) {
3709 error_setg_errno(errp, -ret,
3710 "Failed to discard unused refblocks");
3711 goto fail;
3712 }
3713
3714 old_file_size = bdrv_getlength(bs->file->bs);
3715 if (old_file_size < 0) {
3716 error_setg_errno(errp, -old_file_size,
3717 "Failed to inquire current file length");
3718 ret = old_file_size;
3719 goto fail;
3720 }
3721 last_cluster = qcow2_get_last_cluster(bs, old_file_size);
3722 if (last_cluster < 0) {
3723 error_setg_errno(errp, -last_cluster,
3724 "Failed to find the last cluster");
3725 ret = last_cluster;
3726 goto fail;
3727 }
3728 if ((last_cluster + 1) * s->cluster_size < old_file_size) {
3729 Error *local_err = NULL;
3730
3731 bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
3732 PREALLOC_MODE_OFF, &local_err);
3733 if (local_err) {
3734 warn_reportf_err(local_err,
3735 "Failed to truncate the tail of the image: ");
3736 }
3737 }
3738 } else {
3739 ret = qcow2_grow_l1_table(bs, new_l1_size, true);
3740 if (ret < 0) {
3741 error_setg_errno(errp, -ret, "Failed to grow the L1 table");
3742 goto fail;
3743 }
3744 }
3745
3746 switch (prealloc) {
3747 case PREALLOC_MODE_OFF:
3748 break;
3749
3750 case PREALLOC_MODE_METADATA:
3751 ret = preallocate_co(bs, old_length, offset);
3752 if (ret < 0) {
3753 error_setg_errno(errp, -ret, "Preallocation failed");
3754 goto fail;
3755 }
3756 break;
3757
3758 case PREALLOC_MODE_FALLOC:
3759 case PREALLOC_MODE_FULL:
3760 {
3761 int64_t allocation_start, host_offset, guest_offset;
3762 int64_t clusters_allocated;
3763 int64_t old_file_size, new_file_size;
3764 uint64_t nb_new_data_clusters, nb_new_l2_tables;
3765
3766
3767
3768 if (has_data_file(bs)) {
3769 ret = preallocate_co(bs, old_length, offset);
3770 if (ret < 0) {
3771 error_setg_errno(errp, -ret, "Preallocation failed");
3772 goto fail;
3773 }
3774 break;
3775 }
3776
3777 old_file_size = bdrv_getlength(bs->file->bs);
3778 if (old_file_size < 0) {
3779 error_setg_errno(errp, -old_file_size,
3780 "Failed to inquire current file length");
3781 ret = old_file_size;
3782 goto fail;
3783 }
3784 old_file_size = ROUND_UP(old_file_size, s->cluster_size);
3785
3786 nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
3787 s->cluster_size);
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799 nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters,
3800 s->cluster_size / sizeof(uint64_t));
3801
3802
3803 nb_new_l2_tables++;
3804
3805 allocation_start = qcow2_refcount_area(bs, old_file_size,
3806 nb_new_data_clusters +
3807 nb_new_l2_tables,
3808 true, 0, 0);
3809 if (allocation_start < 0) {
3810 error_setg_errno(errp, -allocation_start,
3811 "Failed to resize refcount structures");
3812 ret = allocation_start;
3813 goto fail;
3814 }
3815
3816 clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
3817 nb_new_data_clusters);
3818 if (clusters_allocated < 0) {
3819 error_setg_errno(errp, -clusters_allocated,
3820 "Failed to allocate data clusters");
3821 ret = clusters_allocated;
3822 goto fail;
3823 }
3824
3825 assert(clusters_allocated == nb_new_data_clusters);
3826
3827
3828 new_file_size = allocation_start +
3829 nb_new_data_clusters * s->cluster_size;
3830 ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp);
3831 if (ret < 0) {
3832 error_prepend(errp, "Failed to resize underlying file: ");
3833 qcow2_free_clusters(bs, allocation_start,
3834 nb_new_data_clusters * s->cluster_size,
3835 QCOW2_DISCARD_OTHER);
3836 goto fail;
3837 }
3838
3839
3840 host_offset = allocation_start;
3841 guest_offset = old_length;
3842 while (nb_new_data_clusters) {
3843 int64_t nb_clusters = MIN(
3844 nb_new_data_clusters,
3845 s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
3846 QCowL2Meta allocation = {
3847 .offset = guest_offset,
3848 .alloc_offset = host_offset,
3849 .nb_clusters = nb_clusters,
3850 };
3851 qemu_co_queue_init(&allocation.dependent_requests);
3852
3853 ret = qcow2_alloc_cluster_link_l2(bs, &allocation);
3854 if (ret < 0) {
3855 error_setg_errno(errp, -ret, "Failed to update L2 tables");
3856 qcow2_free_clusters(bs, host_offset,
3857 nb_new_data_clusters * s->cluster_size,
3858 QCOW2_DISCARD_OTHER);
3859 goto fail;
3860 }
3861
3862 guest_offset += nb_clusters * s->cluster_size;
3863 host_offset += nb_clusters * s->cluster_size;
3864 nb_new_data_clusters -= nb_clusters;
3865 }
3866 break;
3867 }
3868
3869 default:
3870 g_assert_not_reached();
3871 }
3872
3873 if (prealloc != PREALLOC_MODE_OFF) {
3874
3875 ret = qcow2_write_caches(bs);
3876 if (ret < 0) {
3877 error_setg_errno(errp, -ret,
3878 "Failed to flush the preallocated area to disk");
3879 goto fail;
3880 }
3881 }
3882
3883 bs->total_sectors = offset / BDRV_SECTOR_SIZE;
3884
3885 if (has_data_file(bs)) {
3886 if (prealloc == PREALLOC_MODE_METADATA) {
3887 prealloc = PREALLOC_MODE_OFF;
3888 }
3889 ret = bdrv_co_truncate(s->data_file, offset, prealloc, errp);
3890 if (ret < 0) {
3891 goto fail;
3892 }
3893 }
3894
3895
3896 offset = cpu_to_be64(offset);
3897 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
3898 &offset, sizeof(uint64_t));
3899 if (ret < 0) {
3900 error_setg_errno(errp, -ret, "Failed to update the image size");
3901 goto fail;
3902 }
3903
3904 s->l1_vm_state_index = new_l1_size;
3905
3906
3907 options = qdict_clone_shallow(bs->options);
3908 ret = qcow2_update_options(bs, options, s->flags, errp);
3909 qobject_unref(options);
3910 if (ret < 0) {
3911 goto fail;
3912 }
3913 ret = 0;
3914fail:
3915 qemu_co_mutex_unlock(&s->lock);
3916 return ret;
3917}
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929static ssize_t qcow2_compress(void *dest, size_t dest_size,
3930 const void *src, size_t src_size)
3931{
3932 ssize_t ret;
3933 z_stream strm;
3934
3935
3936 memset(&strm, 0, sizeof(strm));
3937 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
3938 -12, 9, Z_DEFAULT_STRATEGY);
3939 if (ret != Z_OK) {
3940 return -2;
3941 }
3942
3943
3944
3945 strm.avail_in = src_size;
3946 strm.next_in = (void *) src;
3947 strm.avail_out = dest_size;
3948 strm.next_out = dest;
3949
3950 ret = deflate(&strm, Z_FINISH);
3951 if (ret == Z_STREAM_END) {
3952 ret = dest_size - strm.avail_out;
3953 } else {
3954 ret = (ret == Z_OK ? -1 : -2);
3955 }
3956
3957 deflateEnd(&strm);
3958
3959 return ret;
3960}
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974static ssize_t qcow2_decompress(void *dest, size_t dest_size,
3975 const void *src, size_t src_size)
3976{
3977 int ret = 0;
3978 z_stream strm;
3979
3980 memset(&strm, 0, sizeof(strm));
3981 strm.avail_in = src_size;
3982 strm.next_in = (void *) src;
3983 strm.avail_out = dest_size;
3984 strm.next_out = dest;
3985
3986 ret = inflateInit2(&strm, -12);
3987 if (ret != Z_OK) {
3988 return -1;
3989 }
3990
3991 ret = inflate(&strm, Z_FINISH);
3992 if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
3993
3994
3995
3996 ret = -1;
3997 }
3998
3999 inflateEnd(&strm);
4000
4001 return ret;
4002}
4003
4004#define MAX_COMPRESS_THREADS 4
4005
4006typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size,
4007 const void *src, size_t src_size);
4008typedef struct Qcow2CompressData {
4009 void *dest;
4010 size_t dest_size;
4011 const void *src;
4012 size_t src_size;
4013 ssize_t ret;
4014
4015 Qcow2CompressFunc func;
4016} Qcow2CompressData;
4017
4018static int qcow2_compress_pool_func(void *opaque)
4019{
4020 Qcow2CompressData *data = opaque;
4021
4022 data->ret = data->func(data->dest, data->dest_size,
4023 data->src, data->src_size);
4024
4025 return 0;
4026}
4027
4028static void qcow2_compress_complete(void *opaque, int ret)
4029{
4030 qemu_coroutine_enter(opaque);
4031}
4032
4033static ssize_t coroutine_fn
4034qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size,
4035 const void *src, size_t src_size, Qcow2CompressFunc func)
4036{
4037 BDRVQcow2State *s = bs->opaque;
4038 BlockAIOCB *acb;
4039 ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
4040 Qcow2CompressData arg = {
4041 .dest = dest,
4042 .dest_size = dest_size,
4043 .src = src,
4044 .src_size = src_size,
4045 .func = func,
4046 };
4047
4048 while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
4049 qemu_co_queue_wait(&s->compress_wait_queue, NULL);
4050 }
4051
4052 s->nb_compress_threads++;
4053 acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
4054 qcow2_compress_complete,
4055 qemu_coroutine_self());
4056
4057 if (!acb) {
4058 s->nb_compress_threads--;
4059 return -EINVAL;
4060 }
4061 qemu_coroutine_yield();
4062 s->nb_compress_threads--;
4063 qemu_co_queue_next(&s->compress_wait_queue);
4064
4065 return arg.ret;
4066}
4067
4068static ssize_t coroutine_fn
4069qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
4070 const void *src, size_t src_size)
4071{
4072 return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
4073 qcow2_compress);
4074}
4075
4076static ssize_t coroutine_fn
4077qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
4078 const void *src, size_t src_size)
4079{
4080 return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
4081 qcow2_decompress);
4082}
4083
4084
4085
4086static coroutine_fn int
4087qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
4088 uint64_t bytes, QEMUIOVector *qiov)
4089{
4090 BDRVQcow2State *s = bs->opaque;
4091 QEMUIOVector hd_qiov;
4092 int ret;
4093 size_t out_len;
4094 uint8_t *buf, *out_buf;
4095 uint64_t cluster_offset;
4096
4097 if (has_data_file(bs)) {
4098 return -ENOTSUP;
4099 }
4100
4101 if (bytes == 0) {
4102
4103
4104 int64_t len = bdrv_getlength(bs->file->bs);
4105 if (len < 0) {
4106 return len;
4107 }
4108 return bdrv_co_truncate(bs->file, len, PREALLOC_MODE_OFF, NULL);
4109 }
4110
4111 if (offset_into_cluster(s, offset)) {
4112 return -EINVAL;
4113 }
4114
4115 buf = qemu_blockalign(bs, s->cluster_size);
4116 if (bytes != s->cluster_size) {
4117 if (bytes > s->cluster_size ||
4118 offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
4119 {
4120 qemu_vfree(buf);
4121 return -EINVAL;
4122 }
4123
4124 memset(buf + bytes, 0, s->cluster_size - bytes);
4125 }
4126 qemu_iovec_to_buf(qiov, 0, buf, bytes);
4127
4128 out_buf = g_malloc(s->cluster_size);
4129
4130 out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1,
4131 buf, s->cluster_size);
4132 if (out_len == -2) {
4133 ret = -EINVAL;
4134 goto fail;
4135 } else if (out_len == -1) {
4136
4137 ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
4138 if (ret < 0) {
4139 goto fail;
4140 }
4141 goto success;
4142 }
4143
4144 qemu_co_mutex_lock(&s->lock);
4145 ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len,
4146 &cluster_offset);
4147 if (ret < 0) {
4148 qemu_co_mutex_unlock(&s->lock);
4149 goto fail;
4150 }
4151
4152 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true);
4153 qemu_co_mutex_unlock(&s->lock);
4154 if (ret < 0) {
4155 goto fail;
4156 }
4157
4158 qemu_iovec_init_buf(&hd_qiov, out_buf, out_len);
4159
4160 BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
4161 ret = bdrv_co_pwritev(s->data_file, cluster_offset, out_len, &hd_qiov, 0);
4162 if (ret < 0) {
4163 goto fail;
4164 }
4165success:
4166 ret = 0;
4167fail:
4168 qemu_vfree(buf);
4169 g_free(out_buf);
4170 return ret;
4171}
4172
4173static int coroutine_fn
4174qcow2_co_preadv_compressed(BlockDriverState *bs,
4175 uint64_t file_cluster_offset,
4176 uint64_t offset,
4177 uint64_t bytes,
4178 QEMUIOVector *qiov)
4179{
4180 BDRVQcow2State *s = bs->opaque;
4181 int ret = 0, csize, nb_csectors;
4182 uint64_t coffset;
4183 uint8_t *buf, *out_buf;
4184 QEMUIOVector local_qiov;
4185 int offset_in_cluster = offset_into_cluster(s, offset);
4186
4187 coffset = file_cluster_offset & s->cluster_offset_mask;
4188 nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
4189 csize = nb_csectors * 512 - (coffset & 511);
4190
4191 buf = g_try_malloc(csize);
4192 if (!buf) {
4193 return -ENOMEM;
4194 }
4195 qemu_iovec_init_buf(&local_qiov, buf, csize);
4196
4197 out_buf = qemu_blockalign(bs, s->cluster_size);
4198
4199 BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
4200 ret = bdrv_co_preadv(bs->file, coffset, csize, &local_qiov, 0);
4201 if (ret < 0) {
4202 goto fail;
4203 }
4204
4205 if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) {
4206 ret = -EIO;
4207 goto fail;
4208 }
4209
4210 qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes);
4211
4212fail:
4213 qemu_vfree(out_buf);
4214 g_free(buf);
4215
4216 return ret;
4217}
4218
4219static int make_completely_empty(BlockDriverState *bs)
4220{
4221 BDRVQcow2State *s = bs->opaque;
4222 Error *local_err = NULL;
4223 int ret, l1_clusters;
4224 int64_t offset;
4225 uint64_t *new_reftable = NULL;
4226 uint64_t rt_entry, l1_size2;
4227 struct {
4228 uint64_t l1_offset;
4229 uint64_t reftable_offset;
4230 uint32_t reftable_clusters;
4231 } QEMU_PACKED l1_ofs_rt_ofs_cls;
4232
4233 ret = qcow2_cache_empty(bs, s->l2_table_cache);
4234 if (ret < 0) {
4235 goto fail;
4236 }
4237
4238 ret = qcow2_cache_empty(bs, s->refcount_block_cache);
4239 if (ret < 0) {
4240 goto fail;
4241 }
4242
4243
4244 ret = qcow2_mark_dirty(bs);
4245 if (ret < 0) {
4246 goto fail;
4247 }
4248
4249 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4250
4251 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4252 l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
4253
4254
4255
4256
4257 ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
4258 l1_clusters * s->cluster_size, 0);
4259 if (ret < 0) {
4260 goto fail_broken_refcounts;
4261 }
4262 memset(s->l1_table, 0, l1_size2);
4263
4264 BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
4265
4266
4267
4268
4269
4270
4271 ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
4272 (2 + l1_clusters) * s->cluster_size, 0);
4273
4274
4275
4276
4277 if (ret < 0) {
4278 goto fail_broken_refcounts;
4279 }
4280
4281 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
4282 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
4283
4284
4285
4286
4287 l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
4288 l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
4289 l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
4290 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
4291 &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
4292 if (ret < 0) {
4293 goto fail_broken_refcounts;
4294 }
4295
4296 s->l1_table_offset = 3 * s->cluster_size;
4297
4298 new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
4299 if (!new_reftable) {
4300 ret = -ENOMEM;
4301 goto fail_broken_refcounts;
4302 }
4303
4304 s->refcount_table_offset = s->cluster_size;
4305 s->refcount_table_size = s->cluster_size / sizeof(uint64_t);
4306 s->max_refcount_table_index = 0;
4307
4308 g_free(s->refcount_table);
4309 s->refcount_table = new_reftable;
4310 new_reftable = NULL;
4311
4312
4313
4314
4315
4316
4317
4318 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
4319
4320
4321 rt_entry = cpu_to_be64(2 * s->cluster_size);
4322 ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
4323 &rt_entry, sizeof(rt_entry));
4324 if (ret < 0) {
4325 goto fail_broken_refcounts;
4326 }
4327 s->refcount_table[0] = 2 * s->cluster_size;
4328
4329 s->free_cluster_index = 0;
4330 assert(3 + l1_clusters <= s->refcount_block_size);
4331 offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
4332 if (offset < 0) {
4333 ret = offset;
4334 goto fail_broken_refcounts;
4335 } else if (offset > 0) {
4336 error_report("First cluster in emptied image is in use");
4337 abort();
4338 }
4339
4340
4341
4342 ret = qcow2_mark_clean(bs);
4343 if (ret < 0) {
4344 goto fail;
4345 }
4346
4347 ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size,
4348 PREALLOC_MODE_OFF, &local_err);
4349 if (ret < 0) {
4350 error_report_err(local_err);
4351 goto fail;
4352 }
4353
4354 return 0;
4355
4356fail_broken_refcounts:
4357
4358
4359
4360
4361
4362
4363 bs->drv = NULL;
4364
4365fail:
4366 g_free(new_reftable);
4367 return ret;
4368}
4369
4370static int qcow2_make_empty(BlockDriverState *bs)
4371{
4372 BDRVQcow2State *s = bs->opaque;
4373 uint64_t offset, end_offset;
4374 int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
4375 int l1_clusters, ret = 0;
4376
4377 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
4378
4379 if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps &&
4380 3 + l1_clusters <= s->refcount_block_size &&
4381 s->crypt_method_header != QCOW_CRYPT_LUKS) {
4382
4383
4384
4385
4386
4387
4388
4389 return make_completely_empty(bs);
4390 }
4391
4392
4393
4394 end_offset = bs->total_sectors * BDRV_SECTOR_SIZE;
4395 for (offset = 0; offset < end_offset; offset += step) {
4396
4397
4398
4399
4400
4401 ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset),
4402 QCOW2_DISCARD_SNAPSHOT, true);
4403 if (ret < 0) {
4404 break;
4405 }
4406 }
4407
4408 return ret;
4409}
4410
4411static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
4412{
4413 BDRVQcow2State *s = bs->opaque;
4414 int ret;
4415
4416 qemu_co_mutex_lock(&s->lock);
4417 ret = qcow2_write_caches(bs);
4418 qemu_co_mutex_unlock(&s->lock);
4419
4420 return ret;
4421}
4422
4423static ssize_t qcow2_measure_crypto_hdr_init_func(QCryptoBlock *block,
4424 size_t headerlen, void *opaque, Error **errp)
4425{
4426 size_t *headerlenp = opaque;
4427
4428
4429 *headerlenp = headerlen;
4430 return 0;
4431}
4432
4433static ssize_t qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block,
4434 size_t offset, const uint8_t *buf, size_t buflen,
4435 void *opaque, Error **errp)
4436{
4437
4438 return buflen;
4439}
4440
4441
4442static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len,
4443 Error **errp)
4444{
4445 QDict *opts_qdict;
4446 QDict *cryptoopts_qdict;
4447 QCryptoBlockCreateOptions *cryptoopts;
4448 QCryptoBlock *crypto;
4449
4450
4451 opts_qdict = qemu_opts_to_qdict(opts, NULL);
4452 qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
4453 qobject_unref(opts_qdict);
4454
4455
4456 qdict_put_str(cryptoopts_qdict, "format", "luks");
4457 cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp);
4458 qobject_unref(cryptoopts_qdict);
4459 if (!cryptoopts) {
4460 return false;
4461 }
4462
4463
4464 crypto = qcrypto_block_create(cryptoopts, "encrypt.",
4465 qcow2_measure_crypto_hdr_init_func,
4466 qcow2_measure_crypto_hdr_write_func,
4467 len, errp);
4468 qapi_free_QCryptoBlockCreateOptions(cryptoopts);
4469 if (!crypto) {
4470 return false;
4471 }
4472
4473 qcrypto_block_free(crypto);
4474 return true;
4475}
4476
4477static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
4478 Error **errp)
4479{
4480 Error *local_err = NULL;
4481 BlockMeasureInfo *info;
4482 uint64_t required = 0;
4483 uint64_t virtual_size;
4484 uint64_t refcount_bits;
4485 uint64_t l2_tables;
4486 uint64_t luks_payload_size = 0;
4487 size_t cluster_size;
4488 int version;
4489 char *optstr;
4490 PreallocMode prealloc;
4491 bool has_backing_file;
4492 bool has_luks;
4493
4494
4495 cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err);
4496 if (local_err) {
4497 goto err;
4498 }
4499
4500 version = qcow2_opt_get_version_del(opts, &local_err);
4501 if (local_err) {
4502 goto err;
4503 }
4504
4505 refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err);
4506 if (local_err) {
4507 goto err;
4508 }
4509
4510 optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
4511 prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr,
4512 PREALLOC_MODE_OFF, &local_err);
4513 g_free(optstr);
4514 if (local_err) {
4515 goto err;
4516 }
4517
4518 optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
4519 has_backing_file = !!optstr;
4520 g_free(optstr);
4521
4522 optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
4523 has_luks = optstr && strcmp(optstr, "luks") == 0;
4524 g_free(optstr);
4525
4526 if (has_luks) {
4527 size_t headerlen;
4528
4529 if (!qcow2_measure_luks_headerlen(opts, &headerlen, &local_err)) {
4530 goto err;
4531 }
4532
4533 luks_payload_size = ROUND_UP(headerlen, cluster_size);
4534 }
4535
4536 virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
4537 virtual_size = ROUND_UP(virtual_size, cluster_size);
4538
4539
4540 l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
4541 cluster_size / sizeof(uint64_t));
4542 if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) {
4543 error_setg(&local_err, "The image size is too large "
4544 "(try using a larger cluster size)");
4545 goto err;
4546 }
4547
4548
4549 if (in_bs) {
4550 int64_t ssize = bdrv_getlength(in_bs);
4551 if (ssize < 0) {
4552 error_setg_errno(&local_err, -ssize,
4553 "Unable to get image virtual_size");
4554 goto err;
4555 }
4556
4557 virtual_size = ROUND_UP(ssize, cluster_size);
4558
4559 if (has_backing_file) {
4560
4561
4562
4563
4564
4565 required = virtual_size;
4566 } else {
4567 int64_t offset;
4568 int64_t pnum = 0;
4569
4570 for (offset = 0; offset < ssize; offset += pnum) {
4571 int ret;
4572
4573 ret = bdrv_block_status_above(in_bs, NULL, offset,
4574 ssize - offset, &pnum, NULL,
4575 NULL);
4576 if (ret < 0) {
4577 error_setg_errno(&local_err, -ret,
4578 "Unable to get block status");
4579 goto err;
4580 }
4581
4582 if (ret & BDRV_BLOCK_ZERO) {
4583
4584 } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
4585 (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
4586
4587 pnum = ROUND_UP(offset + pnum, cluster_size) - offset;
4588
4589
4590 required += offset % cluster_size + pnum;
4591 }
4592 }
4593 }
4594 }
4595
4596
4597
4598
4599 if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
4600 required = virtual_size;
4601 }
4602
4603 info = g_new(BlockMeasureInfo, 1);
4604 info->fully_allocated =
4605 qcow2_calc_prealloc_size(virtual_size, cluster_size,
4606 ctz32(refcount_bits)) + luks_payload_size;
4607
4608
4609
4610
4611
4612 info->required = info->fully_allocated - virtual_size + required;
4613 return info;
4614
4615err:
4616 error_propagate(errp, local_err);
4617 return NULL;
4618}
4619
4620static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4621{
4622 BDRVQcow2State *s = bs->opaque;
4623 bdi->unallocated_blocks_are_zero = true;
4624 bdi->cluster_size = s->cluster_size;
4625 bdi->vm_state_offset = qcow2_vm_state_offset(s);
4626 return 0;
4627}
4628
4629static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs,
4630 Error **errp)
4631{
4632 BDRVQcow2State *s = bs->opaque;
4633 ImageInfoSpecific *spec_info;
4634 QCryptoBlockInfo *encrypt_info = NULL;
4635 Error *local_err = NULL;
4636
4637 if (s->crypto != NULL) {
4638 encrypt_info = qcrypto_block_get_info(s->crypto, &local_err);
4639 if (local_err) {
4640 error_propagate(errp, local_err);
4641 return NULL;
4642 }
4643 }
4644
4645 spec_info = g_new(ImageInfoSpecific, 1);
4646 *spec_info = (ImageInfoSpecific){
4647 .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
4648 .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1),
4649 };
4650 if (s->qcow_version == 2) {
4651 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4652 .compat = g_strdup("0.10"),
4653 .refcount_bits = s->refcount_bits,
4654 };
4655 } else if (s->qcow_version == 3) {
4656 Qcow2BitmapInfoList *bitmaps;
4657 bitmaps = qcow2_get_bitmap_info_list(bs, &local_err);
4658 if (local_err) {
4659 error_propagate(errp, local_err);
4660 qapi_free_ImageInfoSpecific(spec_info);
4661 return NULL;
4662 }
4663 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
4664 .compat = g_strdup("1.1"),
4665 .lazy_refcounts = s->compatible_features &
4666 QCOW2_COMPAT_LAZY_REFCOUNTS,
4667 .has_lazy_refcounts = true,
4668 .corrupt = s->incompatible_features &
4669 QCOW2_INCOMPAT_CORRUPT,
4670 .has_corrupt = true,
4671 .refcount_bits = s->refcount_bits,
4672 .has_bitmaps = !!bitmaps,
4673 .bitmaps = bitmaps,
4674 .has_data_file = !!s->image_data_file,
4675 .data_file = g_strdup(s->image_data_file),
4676 .has_data_file_raw = has_data_file(bs),
4677 .data_file_raw = data_file_is_raw(bs),
4678 };
4679 } else {
4680
4681
4682 assert(false);
4683 }
4684
4685 if (encrypt_info) {
4686 ImageInfoSpecificQCow2Encryption *qencrypt =
4687 g_new(ImageInfoSpecificQCow2Encryption, 1);
4688 switch (encrypt_info->format) {
4689 case Q_CRYPTO_BLOCK_FORMAT_QCOW:
4690 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES;
4691 break;
4692 case Q_CRYPTO_BLOCK_FORMAT_LUKS:
4693 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS;
4694 qencrypt->u.luks = encrypt_info->u.luks;
4695 break;
4696 default:
4697 abort();
4698 }
4699
4700
4701 memset(&encrypt_info->u, 0, sizeof(encrypt_info->u));
4702 qapi_free_QCryptoBlockInfo(encrypt_info);
4703
4704 spec_info->u.qcow2.data->has_encrypt = true;
4705 spec_info->u.qcow2.data->encrypt = qencrypt;
4706 }
4707
4708 return spec_info;
4709}
4710
4711static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4712 int64_t pos)
4713{
4714 BDRVQcow2State *s = bs->opaque;
4715
4716 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
4717 return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
4718 qiov->size, qiov, 0);
4719}
4720
4721static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
4722 int64_t pos)
4723{
4724 BDRVQcow2State *s = bs->opaque;
4725
4726 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
4727 return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
4728 qiov->size, qiov, 0);
4729}
4730
4731
4732
4733
4734
4735static int qcow2_downgrade(BlockDriverState *bs, int target_version,
4736 BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
4737 Error **errp)
4738{
4739 BDRVQcow2State *s = bs->opaque;
4740 int current_version = s->qcow_version;
4741 int ret;
4742
4743
4744 assert(target_version < current_version);
4745
4746
4747 assert(target_version == 2);
4748
4749 if (s->refcount_order != 4) {
4750 error_setg(errp, "compat=0.10 requires refcount_bits=16");
4751 return -ENOTSUP;
4752 }
4753
4754 if (has_data_file(bs)) {
4755 error_setg(errp, "Cannot downgrade an image with a data file");
4756 return -ENOTSUP;
4757 }
4758
4759
4760 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
4761 ret = qcow2_mark_clean(bs);
4762 if (ret < 0) {
4763 error_setg_errno(errp, -ret, "Failed to make the image clean");
4764 return ret;
4765 }
4766 }
4767
4768
4769
4770
4771
4772 if (s->incompatible_features) {
4773 error_setg(errp, "Cannot downgrade an image with incompatible features "
4774 "%#" PRIx64 " set", s->incompatible_features);
4775 return -ENOTSUP;
4776 }
4777
4778
4779 s->compatible_features = 0;
4780
4781
4782
4783
4784 s->autoclear_features = 0;
4785
4786 ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
4787 if (ret < 0) {
4788 error_setg_errno(errp, -ret, "Failed to turn zero into data clusters");
4789 return ret;
4790 }
4791
4792 s->qcow_version = target_version;
4793 ret = qcow2_update_header(bs);
4794 if (ret < 0) {
4795 s->qcow_version = current_version;
4796 error_setg_errno(errp, -ret, "Failed to update the image header");
4797 return ret;
4798 }
4799 return 0;
4800}
4801
4802typedef enum Qcow2AmendOperation {
4803
4804
4805
4806 QCOW2_NO_OPERATION = 0,
4807
4808 QCOW2_CHANGING_REFCOUNT_ORDER,
4809 QCOW2_DOWNGRADING,
4810} Qcow2AmendOperation;
4811
4812typedef struct Qcow2AmendHelperCBInfo {
4813
4814
4815 BlockDriverAmendStatusCB *original_status_cb;
4816 void *original_cb_opaque;
4817
4818 Qcow2AmendOperation current_operation;
4819
4820
4821 int total_operations;
4822
4823
4824
4825
4826 int operations_completed;
4827
4828
4829 int64_t offset_completed;
4830
4831 Qcow2AmendOperation last_operation;
4832 int64_t last_work_size;
4833} Qcow2AmendHelperCBInfo;
4834
4835static void qcow2_amend_helper_cb(BlockDriverState *bs,
4836 int64_t operation_offset,
4837 int64_t operation_work_size, void *opaque)
4838{
4839 Qcow2AmendHelperCBInfo *info = opaque;
4840 int64_t current_work_size;
4841 int64_t projected_work_size;
4842
4843 if (info->current_operation != info->last_operation) {
4844 if (info->last_operation != QCOW2_NO_OPERATION) {
4845 info->offset_completed += info->last_work_size;
4846 info->operations_completed++;
4847 }
4848
4849 info->last_operation = info->current_operation;
4850 }
4851
4852 assert(info->total_operations > 0);
4853 assert(info->operations_completed < info->total_operations);
4854
4855 info->last_work_size = operation_work_size;
4856
4857 current_work_size = info->offset_completed + operation_work_size;
4858
4859
4860
4861
4862
4863 projected_work_size = current_work_size * (info->total_operations -
4864 info->operations_completed - 1)
4865 / (info->operations_completed + 1);
4866
4867 info->original_status_cb(bs, info->offset_completed + operation_offset,
4868 current_work_size + projected_work_size,
4869 info->original_cb_opaque);
4870}
4871
4872static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
4873 BlockDriverAmendStatusCB *status_cb,
4874 void *cb_opaque,
4875 Error **errp)
4876{
4877 BDRVQcow2State *s = bs->opaque;
4878 int old_version = s->qcow_version, new_version = old_version;
4879 uint64_t new_size = 0;
4880 const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL;
4881 bool lazy_refcounts = s->use_lazy_refcounts;
4882 bool data_file_raw = data_file_is_raw(bs);
4883 const char *compat = NULL;
4884 uint64_t cluster_size = s->cluster_size;
4885 bool encrypt;
4886 int encformat;
4887 int refcount_bits = s->refcount_bits;
4888 int ret;
4889 QemuOptDesc *desc = opts->list->desc;
4890 Qcow2AmendHelperCBInfo helper_cb_info;
4891
4892 while (desc && desc->name) {
4893 if (!qemu_opt_find(opts, desc->name)) {
4894
4895 desc++;
4896 continue;
4897 }
4898
4899 if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
4900 compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
4901 if (!compat) {
4902
4903 } else if (!strcmp(compat, "0.10")) {
4904 new_version = 2;
4905 } else if (!strcmp(compat, "1.1")) {
4906 new_version = 3;
4907 } else {
4908 error_setg(errp, "Unknown compatibility level %s", compat);
4909 return -EINVAL;
4910 }
4911 } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
4912 error_setg(errp, "Cannot change preallocation mode");
4913 return -ENOTSUP;
4914 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
4915 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
4916 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
4917 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
4918 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
4919 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
4920 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
4921 encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
4922 !!s->crypto);
4923
4924 if (encrypt != !!s->crypto) {
4925 error_setg(errp,
4926 "Changing the encryption flag is not supported");
4927 return -ENOTSUP;
4928 }
4929 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) {
4930 encformat = qcow2_crypt_method_from_format(
4931 qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT));
4932
4933 if (encformat != s->crypt_method_header) {
4934 error_setg(errp,
4935 "Changing the encryption format is not supported");
4936 return -ENOTSUP;
4937 }
4938 } else if (g_str_has_prefix(desc->name, "encrypt.")) {
4939 error_setg(errp,
4940 "Changing the encryption parameters is not supported");
4941 return -ENOTSUP;
4942 } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
4943 cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
4944 cluster_size);
4945 if (cluster_size != s->cluster_size) {
4946 error_setg(errp, "Changing the cluster size is not supported");
4947 return -ENOTSUP;
4948 }
4949 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
4950 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
4951 lazy_refcounts);
4952 } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
4953 refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
4954 refcount_bits);
4955
4956 if (refcount_bits <= 0 || refcount_bits > 64 ||
4957 !is_power_of_2(refcount_bits))
4958 {
4959 error_setg(errp, "Refcount width must be a power of two and "
4960 "may not exceed 64 bits");
4961 return -EINVAL;
4962 }
4963 } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) {
4964 data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE);
4965 if (data_file && !has_data_file(bs)) {
4966 error_setg(errp, "data-file can only be set for images that "
4967 "use an external data file");
4968 return -EINVAL;
4969 }
4970 } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) {
4971 data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW,
4972 data_file_raw);
4973 if (data_file_raw && !data_file_is_raw(bs)) {
4974 error_setg(errp, "data-file-raw cannot be set on existing "
4975 "images");
4976 return -EINVAL;
4977 }
4978 } else {
4979
4980
4981 abort();
4982 }
4983
4984 desc++;
4985 }
4986
4987 helper_cb_info = (Qcow2AmendHelperCBInfo){
4988 .original_status_cb = status_cb,
4989 .original_cb_opaque = cb_opaque,
4990 .total_operations = (new_version < old_version)
4991 + (s->refcount_bits != refcount_bits)
4992 };
4993
4994
4995 if (new_version > old_version) {
4996 s->qcow_version = new_version;
4997 ret = qcow2_update_header(bs);
4998 if (ret < 0) {
4999 s->qcow_version = old_version;
5000 error_setg_errno(errp, -ret, "Failed to update the image header");
5001 return ret;
5002 }
5003 }
5004
5005 if (s->refcount_bits != refcount_bits) {
5006 int refcount_order = ctz32(refcount_bits);
5007
5008 if (new_version < 3 && refcount_bits != 16) {
5009 error_setg(errp, "Refcount widths other than 16 bits require "
5010 "compatibility level 1.1 or above (use compat=1.1 or "
5011 "greater)");
5012 return -EINVAL;
5013 }
5014
5015 helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
5016 ret = qcow2_change_refcount_order(bs, refcount_order,
5017 &qcow2_amend_helper_cb,
5018 &helper_cb_info, errp);
5019 if (ret < 0) {
5020 return ret;
5021 }
5022 }
5023
5024
5025 if (data_file_raw) {
5026 s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5027 } else {
5028 s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW;
5029 }
5030
5031 if (data_file) {
5032 g_free(s->image_data_file);
5033 s->image_data_file = *data_file ? g_strdup(data_file) : NULL;
5034 }
5035
5036 ret = qcow2_update_header(bs);
5037 if (ret < 0) {
5038 error_setg_errno(errp, -ret, "Failed to update the image header");
5039 return ret;
5040 }
5041
5042 if (backing_file || backing_format) {
5043 ret = qcow2_change_backing_file(bs,
5044 backing_file ?: s->image_backing_file,
5045 backing_format ?: s->image_backing_format);
5046 if (ret < 0) {
5047 error_setg_errno(errp, -ret, "Failed to change the backing file");
5048 return ret;
5049 }
5050 }
5051
5052 if (s->use_lazy_refcounts != lazy_refcounts) {
5053 if (lazy_refcounts) {
5054 if (new_version < 3) {
5055 error_setg(errp, "Lazy refcounts only supported with "
5056 "compatibility level 1.1 and above (use compat=1.1 "
5057 "or greater)");
5058 return -EINVAL;
5059 }
5060 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5061 ret = qcow2_update_header(bs);
5062 if (ret < 0) {
5063 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5064 error_setg_errno(errp, -ret, "Failed to update the image header");
5065 return ret;
5066 }
5067 s->use_lazy_refcounts = true;
5068 } else {
5069
5070 ret = qcow2_mark_clean(bs);
5071 if (ret < 0) {
5072 error_setg_errno(errp, -ret, "Failed to make the image clean");
5073 return ret;
5074 }
5075
5076 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
5077 ret = qcow2_update_header(bs);
5078 if (ret < 0) {
5079 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
5080 error_setg_errno(errp, -ret, "Failed to update the image header");
5081 return ret;
5082 }
5083 s->use_lazy_refcounts = false;
5084 }
5085 }
5086
5087 if (new_size) {
5088 BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
5089 ret = blk_insert_bs(blk, bs, errp);
5090 if (ret < 0) {
5091 blk_unref(blk);
5092 return ret;
5093 }
5094
5095 ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, errp);
5096 blk_unref(blk);
5097 if (ret < 0) {
5098 return ret;
5099 }
5100 }
5101
5102
5103 if (new_version < old_version) {
5104 helper_cb_info.current_operation = QCOW2_DOWNGRADING;
5105 ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
5106 &helper_cb_info, errp);
5107 if (ret < 0) {
5108 return ret;
5109 }
5110 }
5111
5112 return 0;
5113}
5114
5115
5116
5117
5118
5119
5120
5121void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
5122 int64_t size, const char *message_format, ...)
5123{
5124 BDRVQcow2State *s = bs->opaque;
5125 const char *node_name;
5126 char *message;
5127 va_list ap;
5128
5129 fatal = fatal && bdrv_is_writable(bs);
5130
5131 if (s->signaled_corruption &&
5132 (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
5133 {
5134 return;
5135 }
5136
5137 va_start(ap, message_format);
5138 message = g_strdup_vprintf(message_format, ap);
5139 va_end(ap);
5140
5141 if (fatal) {
5142 fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
5143 "corruption events will be suppressed\n", message);
5144 } else {
5145 fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
5146 "corruption events will be suppressed\n", message);
5147 }
5148
5149 node_name = bdrv_get_node_name(bs);
5150 qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
5151 *node_name != '\0', node_name,
5152 message, offset >= 0, offset,
5153 size >= 0, size,
5154 fatal);
5155 g_free(message);
5156
5157 if (fatal) {
5158 qcow2_mark_corrupt(bs);
5159 bs->drv = NULL;
5160 }
5161
5162 s->signaled_corruption = true;
5163}
5164
5165static QemuOptsList qcow2_create_opts = {
5166 .name = "qcow2-create-opts",
5167 .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
5168 .desc = {
5169 {
5170 .name = BLOCK_OPT_SIZE,
5171 .type = QEMU_OPT_SIZE,
5172 .help = "Virtual disk size"
5173 },
5174 {
5175 .name = BLOCK_OPT_COMPAT_LEVEL,
5176 .type = QEMU_OPT_STRING,
5177 .help = "Compatibility level (0.10 or 1.1)"
5178 },
5179 {
5180 .name = BLOCK_OPT_BACKING_FILE,
5181 .type = QEMU_OPT_STRING,
5182 .help = "File name of a base image"
5183 },
5184 {
5185 .name = BLOCK_OPT_BACKING_FMT,
5186 .type = QEMU_OPT_STRING,
5187 .help = "Image format of the base image"
5188 },
5189 {
5190 .name = BLOCK_OPT_DATA_FILE,
5191 .type = QEMU_OPT_STRING,
5192 .help = "File name of an external data file"
5193 },
5194 {
5195 .name = BLOCK_OPT_DATA_FILE_RAW,
5196 .type = QEMU_OPT_BOOL,
5197 .help = "The external data file must stay valid as a raw image"
5198 },
5199 {
5200 .name = BLOCK_OPT_ENCRYPT,
5201 .type = QEMU_OPT_BOOL,
5202 .help = "Encrypt the image with format 'aes'. (Deprecated "
5203 "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
5204 },
5205 {
5206 .name = BLOCK_OPT_ENCRYPT_FORMAT,
5207 .type = QEMU_OPT_STRING,
5208 .help = "Encrypt the image, format choices: 'aes', 'luks'",
5209 },
5210 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.",
5211 "ID of secret providing qcow AES key or LUKS passphrase"),
5212 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."),
5213 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."),
5214 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."),
5215 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."),
5216 BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."),
5217 BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."),
5218 {
5219 .name = BLOCK_OPT_CLUSTER_SIZE,
5220 .type = QEMU_OPT_SIZE,
5221 .help = "qcow2 cluster size",
5222 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
5223 },
5224 {
5225 .name = BLOCK_OPT_PREALLOC,
5226 .type = QEMU_OPT_STRING,
5227 .help = "Preallocation mode (allowed values: off, metadata, "
5228 "falloc, full)"
5229 },
5230 {
5231 .name = BLOCK_OPT_LAZY_REFCOUNTS,
5232 .type = QEMU_OPT_BOOL,
5233 .help = "Postpone refcount updates",
5234 .def_value_str = "off"
5235 },
5236 {
5237 .name = BLOCK_OPT_REFCOUNT_BITS,
5238 .type = QEMU_OPT_NUMBER,
5239 .help = "Width of a reference count entry in bits",
5240 .def_value_str = "16"
5241 },
5242 { }
5243 }
5244};
5245
5246static const char *const qcow2_strong_runtime_opts[] = {
5247 "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,
5248
5249 NULL
5250};
5251
5252BlockDriver bdrv_qcow2 = {
5253 .format_name = "qcow2",
5254 .instance_size = sizeof(BDRVQcow2State),
5255 .bdrv_probe = qcow2_probe,
5256 .bdrv_open = qcow2_open,
5257 .bdrv_close = qcow2_close,
5258 .bdrv_reopen_prepare = qcow2_reopen_prepare,
5259 .bdrv_reopen_commit = qcow2_reopen_commit,
5260 .bdrv_reopen_abort = qcow2_reopen_abort,
5261 .bdrv_join_options = qcow2_join_options,
5262 .bdrv_child_perm = bdrv_format_default_perms,
5263 .bdrv_co_create_opts = qcow2_co_create_opts,
5264 .bdrv_co_create = qcow2_co_create,
5265 .bdrv_has_zero_init = bdrv_has_zero_init_1,
5266 .bdrv_co_block_status = qcow2_co_block_status,
5267
5268 .bdrv_co_preadv = qcow2_co_preadv,
5269 .bdrv_co_pwritev = qcow2_co_pwritev,
5270 .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
5271
5272 .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
5273 .bdrv_co_pdiscard = qcow2_co_pdiscard,
5274 .bdrv_co_copy_range_from = qcow2_co_copy_range_from,
5275 .bdrv_co_copy_range_to = qcow2_co_copy_range_to,
5276 .bdrv_co_truncate = qcow2_co_truncate,
5277 .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
5278 .bdrv_make_empty = qcow2_make_empty,
5279
5280 .bdrv_snapshot_create = qcow2_snapshot_create,
5281 .bdrv_snapshot_goto = qcow2_snapshot_goto,
5282 .bdrv_snapshot_delete = qcow2_snapshot_delete,
5283 .bdrv_snapshot_list = qcow2_snapshot_list,
5284 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
5285 .bdrv_measure = qcow2_measure,
5286 .bdrv_get_info = qcow2_get_info,
5287 .bdrv_get_specific_info = qcow2_get_specific_info,
5288
5289 .bdrv_save_vmstate = qcow2_save_vmstate,
5290 .bdrv_load_vmstate = qcow2_load_vmstate,
5291
5292 .supports_backing = true,
5293 .bdrv_change_backing_file = qcow2_change_backing_file,
5294
5295 .bdrv_refresh_limits = qcow2_refresh_limits,
5296 .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache,
5297 .bdrv_inactivate = qcow2_inactivate,
5298
5299 .create_opts = &qcow2_create_opts,
5300 .strong_runtime_opts = qcow2_strong_runtime_opts,
5301 .mutable_opts = mutable_opts,
5302 .bdrv_co_check = qcow2_co_check,
5303 .bdrv_amend_options = qcow2_amend_options,
5304
5305 .bdrv_detach_aio_context = qcow2_detach_aio_context,
5306 .bdrv_attach_aio_context = qcow2_attach_aio_context,
5307
5308 .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
5309 .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
5310 .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap,
5311};
5312
5313static void bdrv_qcow2_init(void)
5314{
5315 bdrv_register(&bdrv_qcow2);
5316}
5317
5318block_init(bdrv_qcow2_init);
5319