1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "block/block-io.h"
27#include "qapi/error.h"
28#include "qcow2.h"
29#include "qemu/range.h"
30#include "qemu/bswap.h"
31#include "qemu/cutils.h"
32#include "qemu/memalign.h"
33#include "trace.h"
34
35static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
36 uint64_t max);
37
38G_GNUC_WARN_UNUSED_RESULT
39static int update_refcount(BlockDriverState *bs,
40 int64_t offset, int64_t length, uint64_t addend,
41 bool decrease, enum qcow2_discard_type type);
42
43static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index);
44static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index);
45static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index);
46static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index);
47static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index);
48static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index);
49static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index);
50
51static void set_refcount_ro0(void *refcount_array, uint64_t index,
52 uint64_t value);
53static void set_refcount_ro1(void *refcount_array, uint64_t index,
54 uint64_t value);
55static void set_refcount_ro2(void *refcount_array, uint64_t index,
56 uint64_t value);
57static void set_refcount_ro3(void *refcount_array, uint64_t index,
58 uint64_t value);
59static void set_refcount_ro4(void *refcount_array, uint64_t index,
60 uint64_t value);
61static void set_refcount_ro5(void *refcount_array, uint64_t index,
62 uint64_t value);
63static void set_refcount_ro6(void *refcount_array, uint64_t index,
64 uint64_t value);
65
66
67static Qcow2GetRefcountFunc *const get_refcount_funcs[] = {
68 &get_refcount_ro0,
69 &get_refcount_ro1,
70 &get_refcount_ro2,
71 &get_refcount_ro3,
72 &get_refcount_ro4,
73 &get_refcount_ro5,
74 &get_refcount_ro6
75};
76
77static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
78 &set_refcount_ro0,
79 &set_refcount_ro1,
80 &set_refcount_ro2,
81 &set_refcount_ro3,
82 &set_refcount_ro4,
83 &set_refcount_ro5,
84 &set_refcount_ro6
85};
86
87
88
89
90
91static void update_max_refcount_table_index(BDRVQcow2State *s)
92{
93 unsigned i = s->refcount_table_size - 1;
94 while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) {
95 i--;
96 }
97
98 s->max_refcount_table_index = i;
99}
100
101int coroutine_fn qcow2_refcount_init(BlockDriverState *bs)
102{
103 BDRVQcow2State *s = bs->opaque;
104 unsigned int refcount_table_size2, i;
105 int ret;
106
107 assert(s->refcount_order >= 0 && s->refcount_order <= 6);
108
109 s->get_refcount = get_refcount_funcs[s->refcount_order];
110 s->set_refcount = set_refcount_funcs[s->refcount_order];
111
112 assert(s->refcount_table_size <= INT_MAX / REFTABLE_ENTRY_SIZE);
113 refcount_table_size2 = s->refcount_table_size * REFTABLE_ENTRY_SIZE;
114 s->refcount_table = g_try_malloc(refcount_table_size2);
115
116 if (s->refcount_table_size > 0) {
117 if (s->refcount_table == NULL) {
118 ret = -ENOMEM;
119 goto fail;
120 }
121 BLKDBG_CO_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
122 ret = bdrv_co_pread(bs->file, s->refcount_table_offset,
123 refcount_table_size2, s->refcount_table, 0);
124 if (ret < 0) {
125 goto fail;
126 }
127 for(i = 0; i < s->refcount_table_size; i++)
128 be64_to_cpus(&s->refcount_table[i]);
129 update_max_refcount_table_index(s);
130 }
131 return 0;
132 fail:
133 return ret;
134}
135
136void qcow2_refcount_close(BlockDriverState *bs)
137{
138 BDRVQcow2State *s = bs->opaque;
139 g_free(s->refcount_table);
140}
141
142
143static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index)
144{
145 return (((const uint8_t *)refcount_array)[index / 8] >> (index % 8)) & 0x1;
146}
147
148static void set_refcount_ro0(void *refcount_array, uint64_t index,
149 uint64_t value)
150{
151 assert(!(value >> 1));
152 ((uint8_t *)refcount_array)[index / 8] &= ~(0x1 << (index % 8));
153 ((uint8_t *)refcount_array)[index / 8] |= value << (index % 8);
154}
155
156static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index)
157{
158 return (((const uint8_t *)refcount_array)[index / 4] >> (2 * (index % 4)))
159 & 0x3;
160}
161
162static void set_refcount_ro1(void *refcount_array, uint64_t index,
163 uint64_t value)
164{
165 assert(!(value >> 2));
166 ((uint8_t *)refcount_array)[index / 4] &= ~(0x3 << (2 * (index % 4)));
167 ((uint8_t *)refcount_array)[index / 4] |= value << (2 * (index % 4));
168}
169
170static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index)
171{
172 return (((const uint8_t *)refcount_array)[index / 2] >> (4 * (index % 2)))
173 & 0xf;
174}
175
176static void set_refcount_ro2(void *refcount_array, uint64_t index,
177 uint64_t value)
178{
179 assert(!(value >> 4));
180 ((uint8_t *)refcount_array)[index / 2] &= ~(0xf << (4 * (index % 2)));
181 ((uint8_t *)refcount_array)[index / 2] |= value << (4 * (index % 2));
182}
183
184static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index)
185{
186 return ((const uint8_t *)refcount_array)[index];
187}
188
189static void set_refcount_ro3(void *refcount_array, uint64_t index,
190 uint64_t value)
191{
192 assert(!(value >> 8));
193 ((uint8_t *)refcount_array)[index] = value;
194}
195
196static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index)
197{
198 return be16_to_cpu(((const uint16_t *)refcount_array)[index]);
199}
200
201static void set_refcount_ro4(void *refcount_array, uint64_t index,
202 uint64_t value)
203{
204 assert(!(value >> 16));
205 ((uint16_t *)refcount_array)[index] = cpu_to_be16(value);
206}
207
208static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index)
209{
210 return be32_to_cpu(((const uint32_t *)refcount_array)[index]);
211}
212
213static void set_refcount_ro5(void *refcount_array, uint64_t index,
214 uint64_t value)
215{
216 assert(!(value >> 32));
217 ((uint32_t *)refcount_array)[index] = cpu_to_be32(value);
218}
219
220static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index)
221{
222 return be64_to_cpu(((const uint64_t *)refcount_array)[index]);
223}
224
225static void set_refcount_ro6(void *refcount_array, uint64_t index,
226 uint64_t value)
227{
228 ((uint64_t *)refcount_array)[index] = cpu_to_be64(value);
229}
230
231
232static int load_refcount_block(BlockDriverState *bs,
233 int64_t refcount_block_offset,
234 void **refcount_block)
235{
236 BDRVQcow2State *s = bs->opaque;
237
238 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
239 return qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
240 refcount_block);
241}
242
243
244
245
246
247int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
248 uint64_t *refcount)
249{
250 BDRVQcow2State *s = bs->opaque;
251 uint64_t refcount_table_index, block_index;
252 int64_t refcount_block_offset;
253 int ret;
254 void *refcount_block;
255
256 refcount_table_index = cluster_index >> s->refcount_block_bits;
257 if (refcount_table_index >= s->refcount_table_size) {
258 *refcount = 0;
259 return 0;
260 }
261 refcount_block_offset =
262 s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
263 if (!refcount_block_offset) {
264 *refcount = 0;
265 return 0;
266 }
267
268 if (offset_into_cluster(s, refcount_block_offset)) {
269 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64
270 " unaligned (reftable index: %#" PRIx64 ")",
271 refcount_block_offset, refcount_table_index);
272 return -EIO;
273 }
274
275 ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
276 &refcount_block);
277 if (ret < 0) {
278 return ret;
279 }
280
281 block_index = cluster_index & (s->refcount_block_size - 1);
282 *refcount = s->get_refcount(refcount_block, block_index);
283
284 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
285
286 return 0;
287}
288
289
290static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
291 uint64_t offset_b)
292{
293 uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits);
294 uint64_t block_b = offset_b >> (s->cluster_bits + s->refcount_block_bits);
295
296 return (block_a == block_b);
297}
298
299
300
301
302
303
304
305static int alloc_refcount_block(BlockDriverState *bs,
306 int64_t cluster_index, void **refcount_block)
307{
308 BDRVQcow2State *s = bs->opaque;
309 unsigned int refcount_table_index;
310 int64_t ret;
311
312 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
313
314
315 refcount_table_index = cluster_index >> s->refcount_block_bits;
316
317 if (refcount_table_index < s->refcount_table_size) {
318
319 uint64_t refcount_block_offset =
320 s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
321
322
323 if (refcount_block_offset) {
324 if (offset_into_cluster(s, refcount_block_offset)) {
325 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
326 PRIx64 " unaligned (reftable index: "
327 "%#x)", refcount_block_offset,
328 refcount_table_index);
329 return -EIO;
330 }
331
332 return load_refcount_block(bs, refcount_block_offset,
333 refcount_block);
334 }
335 }
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360 *refcount_block = NULL;
361
362
363 ret = qcow2_cache_flush(bs, s->l2_table_cache);
364 if (ret < 0) {
365 return ret;
366 }
367
368
369 int64_t new_block = alloc_clusters_noref(bs, s->cluster_size, INT64_MAX);
370 if (new_block < 0) {
371 return new_block;
372 }
373
374
375 assert((new_block & REFT_OFFSET_MASK) == new_block);
376
377
378 if (new_block == 0) {
379 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
380 "allocation of refcount block at offset 0");
381 return -EIO;
382 }
383
384#ifdef DEBUG_ALLOC2
385 fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
386 " at %" PRIx64 "\n",
387 refcount_table_index, cluster_index << s->cluster_bits, new_block);
388#endif
389
390 if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
391
392 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
393 refcount_block);
394 if (ret < 0) {
395 goto fail;
396 }
397
398 memset(*refcount_block, 0, s->cluster_size);
399
400
401 int block_index = (new_block >> s->cluster_bits) &
402 (s->refcount_block_size - 1);
403 s->set_refcount(*refcount_block, block_index, 1);
404 } else {
405
406
407 ret = update_refcount(bs, new_block, s->cluster_size, 1, false,
408 QCOW2_DISCARD_NEVER);
409 if (ret < 0) {
410 goto fail;
411 }
412
413 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
414 if (ret < 0) {
415 goto fail;
416 }
417
418
419
420 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
421 refcount_block);
422 if (ret < 0) {
423 goto fail;
424 }
425
426 memset(*refcount_block, 0, s->cluster_size);
427 }
428
429
430 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
431 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
432 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
433 if (ret < 0) {
434 goto fail;
435 }
436
437
438 if (refcount_table_index < s->refcount_table_size) {
439 uint64_t data64 = cpu_to_be64(new_block);
440 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
441 ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
442 refcount_table_index * REFTABLE_ENTRY_SIZE,
443 sizeof(data64), &data64, 0);
444 if (ret < 0) {
445 goto fail;
446 }
447
448 s->refcount_table[refcount_table_index] = new_block;
449
450
451 s->max_refcount_table_index =
452 MAX(s->max_refcount_table_index, refcount_table_index);
453
454
455
456 return -EAGAIN;
457 }
458
459 qcow2_cache_put(s->refcount_block_cache, refcount_block);
460
461
462
463
464
465
466
467
468
469
470
471
472 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
473
474
475
476
477
478
479
480
481
482
483
484
485 uint64_t blocks_used = DIV_ROUND_UP(MAX(cluster_index + 1,
486 (new_block >> s->cluster_bits) + 1),
487 s->refcount_block_size);
488
489
490 uint64_t meta_offset = (blocks_used * s->refcount_block_size) *
491 s->cluster_size;
492
493 ret = qcow2_refcount_area(bs, meta_offset, 0, false,
494 refcount_table_index, new_block);
495 if (ret < 0) {
496 return ret;
497 }
498
499 ret = load_refcount_block(bs, new_block, refcount_block);
500 if (ret < 0) {
501 return ret;
502 }
503
504
505
506
507 return -EAGAIN;
508
509fail:
510 if (*refcount_block != NULL) {
511 qcow2_cache_put(s->refcount_block_cache, refcount_block);
512 }
513 return ret;
514}
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
532 uint64_t additional_clusters, bool exact_size,
533 int new_refblock_index,
534 uint64_t new_refblock_offset)
535{
536 BDRVQcow2State *s = bs->opaque;
537 uint64_t total_refblock_count_u64, additional_refblock_count;
538 int total_refblock_count, table_size, area_reftable_index, table_clusters;
539 int i;
540 uint64_t table_offset, block_offset, end_offset;
541 int ret;
542 uint64_t *new_table;
543
544 assert(!(start_offset % s->cluster_size));
545
546 qcow2_refcount_metadata_size(start_offset / s->cluster_size +
547 additional_clusters,
548 s->cluster_size, s->refcount_order,
549 !exact_size, &total_refblock_count_u64);
550 if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) {
551 return -EFBIG;
552 }
553 total_refblock_count = total_refblock_count_u64;
554
555
556
557
558
559 area_reftable_index = (start_offset / s->cluster_size) /
560 s->refcount_block_size;
561
562 if (exact_size) {
563 table_size = total_refblock_count;
564 } else {
565 table_size = total_refblock_count +
566 DIV_ROUND_UP(total_refblock_count, 2);
567 }
568
569 table_size = ROUND_UP(table_size, s->cluster_size / REFTABLE_ENTRY_SIZE);
570 table_clusters = (table_size * REFTABLE_ENTRY_SIZE) / s->cluster_size;
571
572 if (table_size > QCOW_MAX_REFTABLE_SIZE) {
573 return -EFBIG;
574 }
575
576 new_table = g_try_new0(uint64_t, table_size);
577
578 assert(table_size > 0);
579 if (new_table == NULL) {
580 ret = -ENOMEM;
581 goto fail;
582 }
583
584
585 if (table_size > s->max_refcount_table_index) {
586
587 memcpy(new_table, s->refcount_table,
588 (s->max_refcount_table_index + 1) * REFTABLE_ENTRY_SIZE);
589 } else {
590
591
592
593
594 memcpy(new_table, s->refcount_table, table_size * REFTABLE_ENTRY_SIZE);
595 }
596
597 if (new_refblock_offset) {
598 assert(new_refblock_index < total_refblock_count);
599 new_table[new_refblock_index] = new_refblock_offset;
600 }
601
602
603 additional_refblock_count = 0;
604 for (i = area_reftable_index; i < total_refblock_count; i++) {
605 if (!new_table[i]) {
606 additional_refblock_count++;
607 }
608 }
609
610 table_offset = start_offset + additional_refblock_count * s->cluster_size;
611 end_offset = table_offset + table_clusters * s->cluster_size;
612
613
614 block_offset = start_offset;
615 for (i = area_reftable_index; i < total_refblock_count; i++) {
616 void *refblock_data;
617 uint64_t first_offset_covered;
618
619
620 if (new_table[i]) {
621 ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i],
622 &refblock_data);
623 if (ret < 0) {
624 goto fail;
625 }
626 } else {
627 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache,
628 block_offset, &refblock_data);
629 if (ret < 0) {
630 goto fail;
631 }
632 memset(refblock_data, 0, s->cluster_size);
633 qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
634 refblock_data);
635
636 new_table[i] = block_offset;
637 block_offset += s->cluster_size;
638 }
639
640
641 first_offset_covered = (uint64_t)i * s->refcount_block_size *
642 s->cluster_size;
643 if (first_offset_covered < end_offset) {
644 int j, end_index;
645
646
647
648 if (first_offset_covered < start_offset) {
649 assert(i == area_reftable_index);
650 j = (start_offset - first_offset_covered) / s->cluster_size;
651 assert(j < s->refcount_block_size);
652 } else {
653 j = 0;
654 }
655
656 end_index = MIN((end_offset - first_offset_covered) /
657 s->cluster_size,
658 s->refcount_block_size);
659
660 for (; j < end_index; j++) {
661
662 assert(s->get_refcount(refblock_data, j) == 0);
663 s->set_refcount(refblock_data, j, 1);
664 }
665
666 qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
667 refblock_data);
668 }
669
670 qcow2_cache_put(s->refcount_block_cache, &refblock_data);
671 }
672
673 assert(block_offset == table_offset);
674
675
676 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
677 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
678 if (ret < 0) {
679 goto fail;
680 }
681
682
683 for (i = 0; i < total_refblock_count; i++) {
684 cpu_to_be64s(&new_table[i]);
685 }
686
687 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
688 ret = bdrv_pwrite_sync(bs->file, table_offset,
689 table_size * REFTABLE_ENTRY_SIZE, new_table, 0);
690 if (ret < 0) {
691 goto fail;
692 }
693
694 for (i = 0; i < total_refblock_count; i++) {
695 be64_to_cpus(&new_table[i]);
696 }
697
698
699 struct QEMU_PACKED {
700 uint64_t d64;
701 uint32_t d32;
702 } data;
703 data.d64 = cpu_to_be64(table_offset);
704 data.d32 = cpu_to_be32(table_clusters);
705 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
706 ret = bdrv_pwrite_sync(bs->file,
707 offsetof(QCowHeader, refcount_table_offset),
708 sizeof(data), &data, 0);
709 if (ret < 0) {
710 goto fail;
711 }
712
713
714 uint64_t old_table_offset = s->refcount_table_offset;
715 uint64_t old_table_size = s->refcount_table_size;
716
717 g_free(s->refcount_table);
718 s->refcount_table = new_table;
719 s->refcount_table_size = table_size;
720 s->refcount_table_offset = table_offset;
721 update_max_refcount_table_index(s);
722
723
724 qcow2_free_clusters(bs, old_table_offset,
725 old_table_size * REFTABLE_ENTRY_SIZE,
726 QCOW2_DISCARD_OTHER);
727
728 return end_offset;
729
730fail:
731 g_free(new_table);
732 return ret;
733}
734
735void qcow2_process_discards(BlockDriverState *bs, int ret)
736{
737 BDRVQcow2State *s = bs->opaque;
738 Qcow2DiscardRegion *d, *next;
739
740 QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
741 QTAILQ_REMOVE(&s->discards, d, next);
742
743
744 if (ret >= 0) {
745 int r2 = bdrv_pdiscard(bs->file, d->offset, d->bytes);
746 if (r2 < 0) {
747 trace_qcow2_process_discards_failed_region(d->offset, d->bytes,
748 r2);
749 }
750 }
751
752 g_free(d);
753 }
754}
755
756static void update_refcount_discard(BlockDriverState *bs,
757 uint64_t offset, uint64_t length)
758{
759 BDRVQcow2State *s = bs->opaque;
760 Qcow2DiscardRegion *d, *p, *next;
761
762 QTAILQ_FOREACH(d, &s->discards, next) {
763 uint64_t new_start = MIN(offset, d->offset);
764 uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
765
766 if (new_end - new_start <= length + d->bytes) {
767
768
769
770 assert(d->bytes + length == new_end - new_start);
771 d->offset = new_start;
772 d->bytes = new_end - new_start;
773 goto found;
774 }
775 }
776
777 d = g_malloc(sizeof(*d));
778 *d = (Qcow2DiscardRegion) {
779 .bs = bs,
780 .offset = offset,
781 .bytes = length,
782 };
783 QTAILQ_INSERT_TAIL(&s->discards, d, next);
784
785found:
786
787 QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
788 if (p == d
789 || p->offset > d->offset + d->bytes
790 || d->offset > p->offset + p->bytes)
791 {
792 continue;
793 }
794
795
796 assert(p->offset == d->offset + d->bytes
797 || d->offset == p->offset + p->bytes);
798
799 QTAILQ_REMOVE(&s->discards, p, next);
800 d->offset = MIN(d->offset, p->offset);
801 d->bytes += p->bytes;
802 g_free(p);
803 }
804}
805
806
807
808
809static int update_refcount(BlockDriverState *bs,
810 int64_t offset,
811 int64_t length,
812 uint64_t addend,
813 bool decrease,
814 enum qcow2_discard_type type)
815{
816 BDRVQcow2State *s = bs->opaque;
817 int64_t start, last, cluster_offset;
818 void *refcount_block = NULL;
819 int64_t old_table_index = -1;
820 int ret;
821
822#ifdef DEBUG_ALLOC2
823 fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64
824 " addend=%s%" PRIu64 "\n", offset, length, decrease ? "-" : "",
825 addend);
826#endif
827 if (length < 0) {
828 return -EINVAL;
829 } else if (length == 0) {
830 return 0;
831 }
832
833 if (decrease) {
834 qcow2_cache_set_dependency(bs, s->refcount_block_cache,
835 s->l2_table_cache);
836 }
837
838 start = start_of_cluster(s, offset);
839 last = start_of_cluster(s, offset + length - 1);
840 for(cluster_offset = start; cluster_offset <= last;
841 cluster_offset += s->cluster_size)
842 {
843 int block_index;
844 uint64_t refcount;
845 int64_t cluster_index = cluster_offset >> s->cluster_bits;
846 int64_t table_index = cluster_index >> s->refcount_block_bits;
847
848
849 if (table_index != old_table_index) {
850 if (refcount_block) {
851 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
852 }
853 ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
854
855
856 if (ret == -EAGAIN) {
857 if (s->free_cluster_index > (start >> s->cluster_bits)) {
858 s->free_cluster_index = (start >> s->cluster_bits);
859 }
860 }
861 if (ret < 0) {
862 goto fail;
863 }
864 }
865 old_table_index = table_index;
866
867 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
868
869
870 block_index = cluster_index & (s->refcount_block_size - 1);
871
872 refcount = s->get_refcount(refcount_block, block_index);
873 if (decrease ? (refcount - addend > refcount)
874 : (refcount + addend < refcount ||
875 refcount + addend > s->refcount_max))
876 {
877 ret = -EINVAL;
878 goto fail;
879 }
880 if (decrease) {
881 refcount -= addend;
882 } else {
883 refcount += addend;
884 }
885 if (refcount == 0 && cluster_index < s->free_cluster_index) {
886 s->free_cluster_index = cluster_index;
887 }
888 s->set_refcount(refcount_block, block_index, refcount);
889
890 if (refcount == 0) {
891 void *table;
892
893 table = qcow2_cache_is_table_offset(s->refcount_block_cache,
894 offset);
895 if (table != NULL) {
896 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
897 old_table_index = -1;
898 qcow2_cache_discard(s->refcount_block_cache, table);
899 }
900
901 table = qcow2_cache_is_table_offset(s->l2_table_cache, offset);
902 if (table != NULL) {
903 qcow2_cache_discard(s->l2_table_cache, table);
904 }
905
906 if (s->discard_passthrough[type]) {
907 update_refcount_discard(bs, cluster_offset, s->cluster_size);
908 }
909 }
910 }
911
912 ret = 0;
913fail:
914 if (!s->cache_discards) {
915 qcow2_process_discards(bs, ret);
916 }
917
918
919 if (refcount_block) {
920 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
921 }
922
923
924
925
926
927 if (ret < 0) {
928 int dummy;
929 dummy = update_refcount(bs, offset, cluster_offset - offset, addend,
930 !decrease, QCOW2_DISCARD_NEVER);
931 (void)dummy;
932 }
933
934 return ret;
935}
936
937
938
939
940
941
942
943
944
945int qcow2_update_cluster_refcount(BlockDriverState *bs,
946 int64_t cluster_index,
947 uint64_t addend, bool decrease,
948 enum qcow2_discard_type type)
949{
950 BDRVQcow2State *s = bs->opaque;
951 int ret;
952
953 ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
954 decrease, type);
955 if (ret < 0) {
956 return ret;
957 }
958
959 return 0;
960}
961
962
963
964
965
966
967
968
969
970static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
971 uint64_t max)
972{
973 BDRVQcow2State *s = bs->opaque;
974 uint64_t i, nb_clusters, refcount;
975 int ret;
976
977
978 if (s->cache_discards) {
979 qcow2_process_discards(bs, 0);
980 }
981
982 nb_clusters = size_to_clusters(s, size);
983retry:
984 for(i = 0; i < nb_clusters; i++) {
985 uint64_t next_cluster_index = s->free_cluster_index++;
986 ret = qcow2_get_refcount(bs, next_cluster_index, &refcount);
987
988 if (ret < 0) {
989 return ret;
990 } else if (refcount != 0) {
991 goto retry;
992 }
993 }
994
995
996
997 if (s->free_cluster_index > 0 &&
998 s->free_cluster_index - 1 > (max >> s->cluster_bits))
999 {
1000 return -EFBIG;
1001 }
1002
1003#ifdef DEBUG_ALLOC2
1004 fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
1005 size,
1006 (s->free_cluster_index - nb_clusters) << s->cluster_bits);
1007#endif
1008 return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
1009}
1010
1011int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
1012{
1013 int64_t offset;
1014 int ret;
1015
1016 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
1017 do {
1018 offset = alloc_clusters_noref(bs, size, QCOW_MAX_CLUSTER_OFFSET);
1019 if (offset < 0) {
1020 return offset;
1021 }
1022
1023 ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
1024 } while (ret == -EAGAIN);
1025
1026 if (ret < 0) {
1027 return ret;
1028 }
1029
1030 return offset;
1031}
1032
1033int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
1034 int64_t nb_clusters)
1035{
1036 BDRVQcow2State *s = bs->opaque;
1037 uint64_t cluster_index, refcount;
1038 uint64_t i;
1039 int ret;
1040
1041 assert(nb_clusters >= 0);
1042 if (nb_clusters == 0) {
1043 return 0;
1044 }
1045
1046 do {
1047
1048 cluster_index = offset >> s->cluster_bits;
1049 for(i = 0; i < nb_clusters; i++) {
1050 ret = qcow2_get_refcount(bs, cluster_index++, &refcount);
1051 if (ret < 0) {
1052 return ret;
1053 } else if (refcount != 0) {
1054 break;
1055 }
1056 }
1057
1058
1059 ret = update_refcount(bs, offset, i << s->cluster_bits, 1, false,
1060 QCOW2_DISCARD_NEVER);
1061 } while (ret == -EAGAIN);
1062
1063 if (ret < 0) {
1064 return ret;
1065 }
1066
1067 return i;
1068}
1069
1070
1071
1072int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int size)
1073{
1074 BDRVQcow2State *s = bs->opaque;
1075 int64_t offset;
1076 size_t free_in_cluster;
1077 int ret;
1078
1079 BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
1080 assert(size > 0 && size <= s->cluster_size);
1081 assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset));
1082
1083 offset = s->free_byte_offset;
1084
1085 if (offset) {
1086 uint64_t refcount;
1087 ret = qcow2_get_refcount(bs, offset >> s->cluster_bits, &refcount);
1088 if (ret < 0) {
1089 return ret;
1090 }
1091
1092 if (refcount == s->refcount_max) {
1093 offset = 0;
1094 }
1095 }
1096
1097 free_in_cluster = s->cluster_size - offset_into_cluster(s, offset);
1098 do {
1099 if (!offset || free_in_cluster < size) {
1100 int64_t new_cluster;
1101
1102 new_cluster = alloc_clusters_noref(bs, s->cluster_size,
1103 MIN(s->cluster_offset_mask,
1104 QCOW_MAX_CLUSTER_OFFSET));
1105 if (new_cluster < 0) {
1106 return new_cluster;
1107 }
1108
1109 if (new_cluster == 0) {
1110 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
1111 "allocation of compressed cluster "
1112 "at offset 0");
1113 return -EIO;
1114 }
1115
1116 if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
1117 offset = new_cluster;
1118 free_in_cluster = s->cluster_size;
1119 } else {
1120 free_in_cluster += s->cluster_size;
1121 }
1122 }
1123
1124 assert(offset);
1125 ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
1126 if (ret < 0) {
1127 offset = 0;
1128 }
1129 } while (ret == -EAGAIN);
1130 if (ret < 0) {
1131 return ret;
1132 }
1133
1134
1135
1136 qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
1137
1138 s->free_byte_offset = offset + size;
1139 if (!offset_into_cluster(s, s->free_byte_offset)) {
1140 s->free_byte_offset = 0;
1141 }
1142
1143 return offset;
1144}
1145
1146void qcow2_free_clusters(BlockDriverState *bs,
1147 int64_t offset, int64_t size,
1148 enum qcow2_discard_type type)
1149{
1150 int ret;
1151
1152 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
1153 ret = update_refcount(bs, offset, size, 1, true, type);
1154 if (ret < 0) {
1155 fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
1156
1157 }
1158}
1159
1160
1161
1162
1163
1164void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
1165 enum qcow2_discard_type type)
1166{
1167 BDRVQcow2State *s = bs->opaque;
1168 QCow2ClusterType ctype = qcow2_get_cluster_type(bs, l2_entry);
1169
1170 if (has_data_file(bs)) {
1171 if (s->discard_passthrough[type] &&
1172 (ctype == QCOW2_CLUSTER_NORMAL ||
1173 ctype == QCOW2_CLUSTER_ZERO_ALLOC))
1174 {
1175 bdrv_pdiscard(s->data_file, l2_entry & L2E_OFFSET_MASK,
1176 s->cluster_size);
1177 }
1178 return;
1179 }
1180
1181 switch (ctype) {
1182 case QCOW2_CLUSTER_COMPRESSED:
1183 {
1184 uint64_t coffset;
1185 int csize;
1186
1187 qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
1188 qcow2_free_clusters(bs, coffset, csize, type);
1189 }
1190 break;
1191 case QCOW2_CLUSTER_NORMAL:
1192 case QCOW2_CLUSTER_ZERO_ALLOC:
1193 if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
1194 qcow2_signal_corruption(bs, false, -1, -1,
1195 "Cannot free unaligned cluster %#llx",
1196 l2_entry & L2E_OFFSET_MASK);
1197 } else {
1198 qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
1199 s->cluster_size, type);
1200 }
1201 break;
1202 case QCOW2_CLUSTER_ZERO_PLAIN:
1203 case QCOW2_CLUSTER_UNALLOCATED:
1204 break;
1205 default:
1206 abort();
1207 }
1208}
1209
1210int qcow2_write_caches(BlockDriverState *bs)
1211{
1212 BDRVQcow2State *s = bs->opaque;
1213 int ret;
1214
1215 ret = qcow2_cache_write(bs, s->l2_table_cache);
1216 if (ret < 0) {
1217 return ret;
1218 }
1219
1220 if (qcow2_need_accurate_refcounts(s)) {
1221 ret = qcow2_cache_write(bs, s->refcount_block_cache);
1222 if (ret < 0) {
1223 return ret;
1224 }
1225 }
1226
1227 return 0;
1228}
1229
1230int qcow2_flush_caches(BlockDriverState *bs)
1231{
1232 int ret = qcow2_write_caches(bs);
1233 if (ret < 0) {
1234 return ret;
1235 }
1236
1237 return bdrv_flush(bs->file->bs);
1238}
1239
1240
1241
1242
1243
1244
1245
1246int qcow2_update_snapshot_refcount(BlockDriverState *bs,
1247 int64_t l1_table_offset, int l1_size, int addend)
1248{
1249 BDRVQcow2State *s = bs->opaque;
1250 uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount;
1251 bool l1_allocated = false;
1252 int64_t old_entry, old_l2_offset;
1253 unsigned slice, slice_size2, n_slices;
1254 int i, j, l1_modified = 0;
1255 int ret;
1256
1257 assert(addend >= -1 && addend <= 1);
1258
1259 l2_slice = NULL;
1260 l1_table = NULL;
1261 l1_size2 = l1_size * L1E_SIZE;
1262 slice_size2 = s->l2_slice_size * l2_entry_size(s);
1263 n_slices = s->cluster_size / slice_size2;
1264
1265 s->cache_discards = true;
1266
1267
1268
1269
1270 if (l1_table_offset != s->l1_table_offset) {
1271 l1_table = g_try_malloc0(l1_size2);
1272 if (l1_size2 && l1_table == NULL) {
1273 ret = -ENOMEM;
1274 goto fail;
1275 }
1276 l1_allocated = true;
1277
1278 ret = bdrv_pread(bs->file, l1_table_offset, l1_size2, l1_table, 0);
1279 if (ret < 0) {
1280 goto fail;
1281 }
1282
1283 for (i = 0; i < l1_size; i++) {
1284 be64_to_cpus(&l1_table[i]);
1285 }
1286 } else {
1287 assert(l1_size == s->l1_size);
1288 l1_table = s->l1_table;
1289 l1_allocated = false;
1290 }
1291
1292 for (i = 0; i < l1_size; i++) {
1293 l2_offset = l1_table[i];
1294 if (l2_offset) {
1295 old_l2_offset = l2_offset;
1296 l2_offset &= L1E_OFFSET_MASK;
1297
1298 if (offset_into_cluster(s, l2_offset)) {
1299 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
1300 PRIx64 " unaligned (L1 index: %#x)",
1301 l2_offset, i);
1302 ret = -EIO;
1303 goto fail;
1304 }
1305
1306 for (slice = 0; slice < n_slices; slice++) {
1307 ret = qcow2_cache_get(bs, s->l2_table_cache,
1308 l2_offset + slice * slice_size2,
1309 (void **) &l2_slice);
1310 if (ret < 0) {
1311 goto fail;
1312 }
1313
1314 for (j = 0; j < s->l2_slice_size; j++) {
1315 uint64_t cluster_index;
1316 uint64_t offset;
1317
1318 entry = get_l2_entry(s, l2_slice, j);
1319 old_entry = entry;
1320 entry &= ~QCOW_OFLAG_COPIED;
1321 offset = entry & L2E_OFFSET_MASK;
1322
1323 switch (qcow2_get_cluster_type(bs, entry)) {
1324 case QCOW2_CLUSTER_COMPRESSED:
1325 if (addend != 0) {
1326 uint64_t coffset;
1327 int csize;
1328
1329 qcow2_parse_compressed_l2_entry(bs, entry,
1330 &coffset, &csize);
1331 ret = update_refcount(
1332 bs, coffset, csize,
1333 abs(addend), addend < 0,
1334 QCOW2_DISCARD_SNAPSHOT);
1335 if (ret < 0) {
1336 goto fail;
1337 }
1338 }
1339
1340 refcount = 2;
1341 break;
1342
1343 case QCOW2_CLUSTER_NORMAL:
1344 case QCOW2_CLUSTER_ZERO_ALLOC:
1345 if (offset_into_cluster(s, offset)) {
1346
1347 int l2_index = slice * s->l2_slice_size + j;
1348 qcow2_signal_corruption(
1349 bs, true, -1, -1, "Cluster "
1350 "allocation offset %#" PRIx64
1351 " unaligned (L2 offset: %#"
1352 PRIx64 ", L2 index: %#x)",
1353 offset, l2_offset, l2_index);
1354 ret = -EIO;
1355 goto fail;
1356 }
1357
1358 cluster_index = offset >> s->cluster_bits;
1359 assert(cluster_index);
1360 if (addend != 0) {
1361 ret = qcow2_update_cluster_refcount(
1362 bs, cluster_index, abs(addend), addend < 0,
1363 QCOW2_DISCARD_SNAPSHOT);
1364 if (ret < 0) {
1365 goto fail;
1366 }
1367 }
1368
1369 ret = qcow2_get_refcount(bs, cluster_index, &refcount);
1370 if (ret < 0) {
1371 goto fail;
1372 }
1373 break;
1374
1375 case QCOW2_CLUSTER_ZERO_PLAIN:
1376 case QCOW2_CLUSTER_UNALLOCATED:
1377 refcount = 0;
1378 break;
1379
1380 default:
1381 abort();
1382 }
1383
1384 if (refcount == 1) {
1385 entry |= QCOW_OFLAG_COPIED;
1386 }
1387 if (entry != old_entry) {
1388 if (addend > 0) {
1389 qcow2_cache_set_dependency(bs, s->l2_table_cache,
1390 s->refcount_block_cache);
1391 }
1392 set_l2_entry(s, l2_slice, j, entry);
1393 qcow2_cache_entry_mark_dirty(s->l2_table_cache,
1394 l2_slice);
1395 }
1396 }
1397
1398 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1399 }
1400
1401 if (addend != 0) {
1402 ret = qcow2_update_cluster_refcount(bs, l2_offset >>
1403 s->cluster_bits,
1404 abs(addend), addend < 0,
1405 QCOW2_DISCARD_SNAPSHOT);
1406 if (ret < 0) {
1407 goto fail;
1408 }
1409 }
1410 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
1411 &refcount);
1412 if (ret < 0) {
1413 goto fail;
1414 } else if (refcount == 1) {
1415 l2_offset |= QCOW_OFLAG_COPIED;
1416 }
1417 if (l2_offset != old_l2_offset) {
1418 l1_table[i] = l2_offset;
1419 l1_modified = 1;
1420 }
1421 }
1422 }
1423
1424 ret = bdrv_flush(bs);
1425fail:
1426 if (l2_slice) {
1427 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1428 }
1429
1430 s->cache_discards = false;
1431 qcow2_process_discards(bs, ret);
1432
1433
1434 if (ret == 0 && addend >= 0 && l1_modified) {
1435 for (i = 0; i < l1_size; i++) {
1436 cpu_to_be64s(&l1_table[i]);
1437 }
1438
1439 ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_size2, l1_table,
1440 0);
1441
1442 for (i = 0; i < l1_size; i++) {
1443 be64_to_cpus(&l1_table[i]);
1444 }
1445 }
1446 if (l1_allocated)
1447 g_free(l1_table);
1448 return ret;
1449}
1450
1451
1452
1453
1454
1455
1456
1457
1458static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries)
1459{
1460
1461
1462
1463
1464 assert(entries < (UINT64_C(1) << (64 - 9)));
1465
1466
1467
1468
1469 return DIV_ROUND_UP(entries << s->refcount_order, 8);
1470}
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481static int realloc_refcount_array(BDRVQcow2State *s, void **array,
1482 int64_t *size, int64_t new_size)
1483{
1484 int64_t old_byte_size, new_byte_size;
1485 void *new_ptr;
1486
1487
1488 old_byte_size = size_to_clusters(s, refcount_array_byte_size(s, *size))
1489 * s->cluster_size;
1490 new_byte_size = size_to_clusters(s, refcount_array_byte_size(s, new_size))
1491 * s->cluster_size;
1492
1493 if (new_byte_size == old_byte_size) {
1494 *size = new_size;
1495 return 0;
1496 }
1497
1498 assert(new_byte_size > 0);
1499
1500 if (new_byte_size > SIZE_MAX) {
1501 return -ENOMEM;
1502 }
1503
1504 new_ptr = g_try_realloc(*array, new_byte_size);
1505 if (!new_ptr) {
1506 return -ENOMEM;
1507 }
1508
1509 if (new_byte_size > old_byte_size) {
1510 memset((char *)new_ptr + old_byte_size, 0,
1511 new_byte_size - old_byte_size);
1512 }
1513
1514 *array = new_ptr;
1515 *size = new_size;
1516
1517 return 0;
1518}
1519
1520
1521
1522
1523
1524
1525
1526
1527int coroutine_fn GRAPH_RDLOCK
1528qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
1529 void **refcount_table,
1530 int64_t *refcount_table_size,
1531 int64_t offset, int64_t size)
1532{
1533 BDRVQcow2State *s = bs->opaque;
1534 uint64_t start, last, cluster_offset, k, refcount;
1535 int64_t file_len;
1536 int ret;
1537
1538 if (size <= 0) {
1539 return 0;
1540 }
1541
1542 file_len = bdrv_co_getlength(bs->file->bs);
1543 if (file_len < 0) {
1544 return file_len;
1545 }
1546
1547
1548
1549
1550
1551
1552 if (offset + size - file_len >= s->cluster_size) {
1553 fprintf(stderr, "ERROR: counting reference for region exceeding the "
1554 "end of the file by one cluster or more: offset 0x%" PRIx64
1555 " size 0x%" PRIx64 "\n", offset, size);
1556 res->corruptions++;
1557 return 0;
1558 }
1559
1560 start = start_of_cluster(s, offset);
1561 last = start_of_cluster(s, offset + size - 1);
1562 for(cluster_offset = start; cluster_offset <= last;
1563 cluster_offset += s->cluster_size) {
1564 k = cluster_offset >> s->cluster_bits;
1565 if (k >= *refcount_table_size) {
1566 ret = realloc_refcount_array(s, refcount_table,
1567 refcount_table_size, k + 1);
1568 if (ret < 0) {
1569 res->check_errors++;
1570 return ret;
1571 }
1572 }
1573
1574 refcount = s->get_refcount(*refcount_table, k);
1575 if (refcount == s->refcount_max) {
1576 fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
1577 "\n", cluster_offset);
1578 fprintf(stderr, "Use qemu-img amend to increase the refcount entry "
1579 "width or qemu-img convert to create a clean copy if the "
1580 "image cannot be opened for writing\n");
1581 res->corruptions++;
1582 continue;
1583 }
1584 s->set_refcount(*refcount_table, k, refcount + 1);
1585 }
1586
1587 return 0;
1588}
1589
1590
1591enum {
1592 CHECK_FRAG_INFO = 0x2,
1593};
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604static int coroutine_fn GRAPH_RDLOCK
1605fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
1606 uint64_t l2_offset, uint64_t *l2_table,
1607 int l2_index, bool active,
1608 bool *metadata_overlap)
1609{
1610 BDRVQcow2State *s = bs->opaque;
1611 int ret;
1612 int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t));
1613 uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s);
1614 int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2;
1615
1616 if (has_subclusters(s)) {
1617 uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, l2_index);
1618
1619
1620 l2_bitmap |= l2_bitmap << 32;
1621 l2_bitmap &= QCOW_L2_BITMAP_ALL_ZEROES;
1622
1623 set_l2_bitmap(s, l2_table, l2_index, l2_bitmap);
1624 set_l2_entry(s, l2_table, l2_index, 0);
1625 } else {
1626 set_l2_entry(s, l2_table, l2_index, QCOW_OFLAG_ZERO);
1627 }
1628
1629 ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s),
1630 false);
1631 if (metadata_overlap) {
1632 *metadata_overlap = ret < 0;
1633 }
1634 if (ret < 0) {
1635 fprintf(stderr, "ERROR: Overlap check failed\n");
1636 goto fail;
1637 }
1638
1639 ret = bdrv_co_pwrite_sync(bs->file, l2e_offset, l2_entry_size(s),
1640 &l2_table[idx], 0);
1641 if (ret < 0) {
1642 fprintf(stderr, "ERROR: Failed to overwrite L2 "
1643 "table entry: %s\n", strerror(-ret));
1644 goto fail;
1645 }
1646
1647 res->corruptions--;
1648 res->corruptions_fixed++;
1649 return 0;
1650
1651fail:
1652 res->check_errors++;
1653 return ret;
1654}
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664static int coroutine_fn GRAPH_RDLOCK
1665check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
1666 void **refcount_table,
1667 int64_t *refcount_table_size, int64_t l2_offset,
1668 int flags, BdrvCheckMode fix, bool active)
1669{
1670 BDRVQcow2State *s = bs->opaque;
1671 uint64_t l2_entry, l2_bitmap;
1672 uint64_t next_contiguous_offset = 0;
1673 int i, ret;
1674 size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
1675 g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
1676 bool metadata_overlap;
1677
1678
1679 ret = bdrv_co_pread(bs->file, l2_offset, l2_size_bytes, l2_table, 0);
1680 if (ret < 0) {
1681 fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
1682 res->check_errors++;
1683 return ret;
1684 }
1685
1686
1687 for (i = 0; i < s->l2_size; i++) {
1688 uint64_t coffset;
1689 int csize;
1690 QCow2ClusterType type;
1691
1692 l2_entry = get_l2_entry(s, l2_table, i);
1693 l2_bitmap = get_l2_bitmap(s, l2_table, i);
1694 type = qcow2_get_cluster_type(bs, l2_entry);
1695
1696 if (type != QCOW2_CLUSTER_COMPRESSED) {
1697
1698 if (l2_entry & L2E_STD_RESERVED_MASK) {
1699 fprintf(stderr, "ERROR found l2 entry with reserved bits set: "
1700 "%" PRIx64 "\n", l2_entry);
1701 res->corruptions++;
1702 }
1703 }
1704
1705 switch (type) {
1706 case QCOW2_CLUSTER_COMPRESSED:
1707
1708 if (l2_entry & QCOW_OFLAG_COPIED) {
1709 fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": "
1710 "copied flag must never be set for compressed "
1711 "clusters\n", l2_entry & s->cluster_offset_mask);
1712 l2_entry &= ~QCOW_OFLAG_COPIED;
1713 res->corruptions++;
1714 }
1715
1716 if (has_data_file(bs)) {
1717 fprintf(stderr, "ERROR compressed cluster %d with data file, "
1718 "entry=0x%" PRIx64 "\n", i, l2_entry);
1719 res->corruptions++;
1720 break;
1721 }
1722
1723 if (l2_bitmap) {
1724 fprintf(stderr, "ERROR compressed cluster %d with non-zero "
1725 "subcluster allocation bitmap, entry=0x%" PRIx64 "\n",
1726 i, l2_entry);
1727 res->corruptions++;
1728 break;
1729 }
1730
1731
1732 qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
1733 ret = qcow2_inc_refcounts_imrt(
1734 bs, res, refcount_table, refcount_table_size, coffset, csize);
1735 if (ret < 0) {
1736 return ret;
1737 }
1738
1739 if (flags & CHECK_FRAG_INFO) {
1740 res->bfi.allocated_clusters++;
1741 res->bfi.compressed_clusters++;
1742
1743
1744
1745
1746
1747
1748
1749 res->bfi.fragmented_clusters++;
1750 }
1751 break;
1752
1753 case QCOW2_CLUSTER_ZERO_ALLOC:
1754 case QCOW2_CLUSTER_NORMAL:
1755 {
1756 uint64_t offset = l2_entry & L2E_OFFSET_MASK;
1757
1758 if ((l2_bitmap >> 32) & l2_bitmap) {
1759 res->corruptions++;
1760 fprintf(stderr, "ERROR offset=%" PRIx64 ": Allocated "
1761 "cluster has corrupted subcluster allocation bitmap\n",
1762 offset);
1763 }
1764
1765
1766 if (offset_into_cluster(s, offset)) {
1767 bool contains_data;
1768 res->corruptions++;
1769
1770 if (has_subclusters(s)) {
1771 contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC);
1772 } else {
1773 contains_data = !(l2_entry & QCOW_OFLAG_ZERO);
1774 }
1775
1776 if (!contains_data) {
1777 fprintf(stderr, "%s offset=%" PRIx64 ": Preallocated "
1778 "cluster is not properly aligned; L2 entry "
1779 "corrupted.\n",
1780 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR",
1781 offset);
1782 if (fix & BDRV_FIX_ERRORS) {
1783 ret = fix_l2_entry_by_zero(bs, res, l2_offset,
1784 l2_table, i, active,
1785 &metadata_overlap);
1786 if (metadata_overlap) {
1787
1788
1789
1790
1791 return ret;
1792 }
1793
1794 if (ret == 0) {
1795
1796
1797
1798
1799 continue;
1800 }
1801
1802
1803
1804
1805
1806
1807 }
1808 } else {
1809 fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is "
1810 "not properly aligned; L2 entry corrupted.\n", offset);
1811 }
1812 }
1813
1814 if (flags & CHECK_FRAG_INFO) {
1815 res->bfi.allocated_clusters++;
1816 if (next_contiguous_offset &&
1817 offset != next_contiguous_offset) {
1818 res->bfi.fragmented_clusters++;
1819 }
1820 next_contiguous_offset = offset + s->cluster_size;
1821 }
1822
1823
1824 if (!has_data_file(bs)) {
1825 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table,
1826 refcount_table_size,
1827 offset, s->cluster_size);
1828 if (ret < 0) {
1829 return ret;
1830 }
1831 }
1832 break;
1833 }
1834
1835 case QCOW2_CLUSTER_ZERO_PLAIN:
1836
1837 assert(!l2_bitmap);
1838 break;
1839
1840 case QCOW2_CLUSTER_UNALLOCATED:
1841 if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) {
1842 res->corruptions++;
1843 fprintf(stderr, "ERROR: Unallocated "
1844 "cluster has non-zero subcluster allocation map\n");
1845 }
1846 break;
1847
1848 default:
1849 abort();
1850 }
1851 }
1852
1853 return 0;
1854}
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864static int coroutine_fn GRAPH_RDLOCK
1865check_refcounts_l1(BlockDriverState *bs, BdrvCheckResult *res,
1866 void **refcount_table, int64_t *refcount_table_size,
1867 int64_t l1_table_offset, int l1_size,
1868 int flags, BdrvCheckMode fix, bool active)
1869{
1870 BDRVQcow2State *s = bs->opaque;
1871 size_t l1_size_bytes = l1_size * L1E_SIZE;
1872 g_autofree uint64_t *l1_table = NULL;
1873 uint64_t l2_offset;
1874 int i, ret;
1875
1876 if (!l1_size) {
1877 return 0;
1878 }
1879
1880
1881 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
1882 l1_table_offset, l1_size_bytes);
1883 if (ret < 0) {
1884 return ret;
1885 }
1886
1887 l1_table = g_try_malloc(l1_size_bytes);
1888 if (l1_table == NULL) {
1889 res->check_errors++;
1890 return -ENOMEM;
1891 }
1892
1893
1894 ret = bdrv_co_pread(bs->file, l1_table_offset, l1_size_bytes, l1_table, 0);
1895 if (ret < 0) {
1896 fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
1897 res->check_errors++;
1898 return ret;
1899 }
1900
1901 for (i = 0; i < l1_size; i++) {
1902 be64_to_cpus(&l1_table[i]);
1903 }
1904
1905
1906 for (i = 0; i < l1_size; i++) {
1907 if (!l1_table[i]) {
1908 continue;
1909 }
1910
1911 if (l1_table[i] & L1E_RESERVED_MASK) {
1912 fprintf(stderr, "ERROR found L1 entry with reserved bits set: "
1913 "%" PRIx64 "\n", l1_table[i]);
1914 res->corruptions++;
1915 }
1916
1917 l2_offset = l1_table[i] & L1E_OFFSET_MASK;
1918
1919
1920 ret = qcow2_inc_refcounts_imrt(bs, res,
1921 refcount_table, refcount_table_size,
1922 l2_offset, s->cluster_size);
1923 if (ret < 0) {
1924 return ret;
1925 }
1926
1927
1928 if (offset_into_cluster(s, l2_offset)) {
1929 fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
1930 "cluster aligned; L1 entry corrupted\n", l2_offset);
1931 res->corruptions++;
1932 }
1933
1934
1935 ret = check_refcounts_l2(bs, res, refcount_table,
1936 refcount_table_size, l2_offset, flags,
1937 fix, active);
1938 if (ret < 0) {
1939 return ret;
1940 }
1941 }
1942
1943 return 0;
1944}
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954static int coroutine_fn GRAPH_RDLOCK
1955check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
1956{
1957 BDRVQcow2State *s = bs->opaque;
1958 uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
1959 int ret;
1960 uint64_t refcount;
1961 int i, j;
1962 bool repair;
1963
1964 if (fix & BDRV_FIX_ERRORS) {
1965
1966 repair = true;
1967 } else if (fix & BDRV_FIX_LEAKS) {
1968
1969
1970
1971 repair = !res->check_errors && !res->corruptions && !res->leaks;
1972 } else {
1973 repair = false;
1974 }
1975
1976 for (i = 0; i < s->l1_size; i++) {
1977 uint64_t l1_entry = s->l1_table[i];
1978 uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
1979 int l2_dirty = 0;
1980
1981 if (!l2_offset) {
1982 continue;
1983 }
1984
1985 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
1986 &refcount);
1987 if (ret < 0) {
1988
1989 continue;
1990 }
1991 if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
1992 res->corruptions++;
1993 fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
1994 "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
1995 repair ? "Repairing" : "ERROR", i, l1_entry, refcount);
1996 if (repair) {
1997 s->l1_table[i] = refcount == 1
1998 ? l1_entry | QCOW_OFLAG_COPIED
1999 : l1_entry & ~QCOW_OFLAG_COPIED;
2000 ret = qcow2_write_l1_entry(bs, i);
2001 if (ret < 0) {
2002 res->check_errors++;
2003 goto fail;
2004 }
2005 res->corruptions--;
2006 res->corruptions_fixed++;
2007 }
2008 }
2009
2010 ret = bdrv_co_pread(bs->file, l2_offset, s->l2_size * l2_entry_size(s),
2011 l2_table, 0);
2012 if (ret < 0) {
2013 fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
2014 strerror(-ret));
2015 res->check_errors++;
2016 goto fail;
2017 }
2018
2019 for (j = 0; j < s->l2_size; j++) {
2020 uint64_t l2_entry = get_l2_entry(s, l2_table, j);
2021 uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
2022 QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, l2_entry);
2023
2024 if (cluster_type == QCOW2_CLUSTER_NORMAL ||
2025 cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) {
2026 if (has_data_file(bs)) {
2027 refcount = 1;
2028 } else {
2029 ret = qcow2_get_refcount(bs,
2030 data_offset >> s->cluster_bits,
2031 &refcount);
2032 if (ret < 0) {
2033
2034 continue;
2035 }
2036 }
2037 if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
2038 res->corruptions++;
2039 fprintf(stderr, "%s OFLAG_COPIED data cluster: "
2040 "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
2041 repair ? "Repairing" : "ERROR", l2_entry, refcount);
2042 if (repair) {
2043 set_l2_entry(s, l2_table, j,
2044 refcount == 1 ?
2045 l2_entry | QCOW_OFLAG_COPIED :
2046 l2_entry & ~QCOW_OFLAG_COPIED);
2047 l2_dirty++;
2048 }
2049 }
2050 }
2051 }
2052
2053 if (l2_dirty > 0) {
2054 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
2055 l2_offset, s->cluster_size,
2056 false);
2057 if (ret < 0) {
2058 fprintf(stderr, "ERROR: Could not write L2 table; metadata "
2059 "overlap check failed: %s\n", strerror(-ret));
2060 res->check_errors++;
2061 goto fail;
2062 }
2063
2064 ret = bdrv_co_pwrite(bs->file, l2_offset, s->cluster_size, l2_table, 0);
2065 if (ret < 0) {
2066 fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
2067 strerror(-ret));
2068 res->check_errors++;
2069 goto fail;
2070 }
2071 res->corruptions -= l2_dirty;
2072 res->corruptions_fixed += l2_dirty;
2073 }
2074 }
2075
2076 ret = 0;
2077
2078fail:
2079 qemu_vfree(l2_table);
2080 return ret;
2081}
2082
2083
2084
2085
2086
2087static int coroutine_fn GRAPH_RDLOCK
2088check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
2089 BdrvCheckMode fix, bool *rebuild,
2090 void **refcount_table, int64_t *nb_clusters)
2091{
2092 BDRVQcow2State *s = bs->opaque;
2093 int64_t i, size;
2094 int ret;
2095
2096 for(i = 0; i < s->refcount_table_size; i++) {
2097 uint64_t offset, cluster;
2098 offset = s->refcount_table[i] & REFT_OFFSET_MASK;
2099 cluster = offset >> s->cluster_bits;
2100
2101 if (s->refcount_table[i] & REFT_RESERVED_MASK) {
2102 fprintf(stderr, "ERROR refcount table entry %" PRId64 " has "
2103 "reserved bits set\n", i);
2104 res->corruptions++;
2105 *rebuild = true;
2106 continue;
2107 }
2108
2109
2110 if (offset_into_cluster(s, offset)) {
2111 fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
2112 "cluster aligned; refcount table entry corrupted\n", i);
2113 res->corruptions++;
2114 *rebuild = true;
2115 continue;
2116 }
2117
2118 if (cluster >= *nb_clusters) {
2119 res->corruptions++;
2120 fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n",
2121 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
2122
2123 if (fix & BDRV_FIX_ERRORS) {
2124 int64_t new_nb_clusters;
2125 Error *local_err = NULL;
2126
2127 if (offset > INT64_MAX - s->cluster_size) {
2128 ret = -EINVAL;
2129 goto resize_fail;
2130 }
2131
2132 ret = bdrv_co_truncate(bs->file, offset + s->cluster_size, false,
2133 PREALLOC_MODE_OFF, 0, &local_err);
2134 if (ret < 0) {
2135 error_report_err(local_err);
2136 goto resize_fail;
2137 }
2138 size = bdrv_co_getlength(bs->file->bs);
2139 if (size < 0) {
2140 ret = size;
2141 goto resize_fail;
2142 }
2143
2144 new_nb_clusters = size_to_clusters(s, size);
2145 assert(new_nb_clusters >= *nb_clusters);
2146
2147 ret = realloc_refcount_array(s, refcount_table,
2148 nb_clusters, new_nb_clusters);
2149 if (ret < 0) {
2150 res->check_errors++;
2151 return ret;
2152 }
2153
2154 if (cluster >= *nb_clusters) {
2155 ret = -EINVAL;
2156 goto resize_fail;
2157 }
2158
2159 res->corruptions--;
2160 res->corruptions_fixed++;
2161 ret = qcow2_inc_refcounts_imrt(bs, res,
2162 refcount_table, nb_clusters,
2163 offset, s->cluster_size);
2164 if (ret < 0) {
2165 return ret;
2166 }
2167
2168
2169
2170 continue;
2171
2172resize_fail:
2173 *rebuild = true;
2174 fprintf(stderr, "ERROR could not resize image: %s\n",
2175 strerror(-ret));
2176 }
2177 continue;
2178 }
2179
2180 if (offset != 0) {
2181 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2182 offset, s->cluster_size);
2183 if (ret < 0) {
2184 return ret;
2185 }
2186 if (s->get_refcount(*refcount_table, cluster) != 1) {
2187 fprintf(stderr, "ERROR refcount block %" PRId64
2188 " refcount=%" PRIu64 "\n", i,
2189 s->get_refcount(*refcount_table, cluster));
2190 res->corruptions++;
2191 *rebuild = true;
2192 }
2193 }
2194 }
2195
2196 return 0;
2197}
2198
2199
2200
2201
2202static int coroutine_fn GRAPH_RDLOCK
2203calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2204 BdrvCheckMode fix, bool *rebuild,
2205 void **refcount_table, int64_t *nb_clusters)
2206{
2207 BDRVQcow2State *s = bs->opaque;
2208 int64_t i;
2209 QCowSnapshot *sn;
2210 int ret;
2211
2212 if (!*refcount_table) {
2213 int64_t old_size = 0;
2214 ret = realloc_refcount_array(s, refcount_table,
2215 &old_size, *nb_clusters);
2216 if (ret < 0) {
2217 res->check_errors++;
2218 return ret;
2219 }
2220 }
2221
2222
2223 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2224 0, s->cluster_size);
2225 if (ret < 0) {
2226 return ret;
2227 }
2228
2229
2230 ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
2231 s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO,
2232 fix, true);
2233 if (ret < 0) {
2234 return ret;
2235 }
2236
2237
2238 if (has_data_file(bs) && s->nb_snapshots) {
2239 fprintf(stderr, "ERROR %d snapshots in image with data file\n",
2240 s->nb_snapshots);
2241 res->corruptions++;
2242 }
2243
2244 for (i = 0; i < s->nb_snapshots; i++) {
2245 sn = s->snapshots + i;
2246 if (offset_into_cluster(s, sn->l1_table_offset)) {
2247 fprintf(stderr, "ERROR snapshot %s (%s) l1_offset=%#" PRIx64 ": "
2248 "L1 table is not cluster aligned; snapshot table entry "
2249 "corrupted\n", sn->id_str, sn->name, sn->l1_table_offset);
2250 res->corruptions++;
2251 continue;
2252 }
2253 if (sn->l1_size > QCOW_MAX_L1_SIZE / L1E_SIZE) {
2254 fprintf(stderr, "ERROR snapshot %s (%s) l1_size=%#" PRIx32 ": "
2255 "L1 table is too large; snapshot table entry corrupted\n",
2256 sn->id_str, sn->name, sn->l1_size);
2257 res->corruptions++;
2258 continue;
2259 }
2260 ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
2261 sn->l1_table_offset, sn->l1_size, 0, fix,
2262 false);
2263 if (ret < 0) {
2264 return ret;
2265 }
2266 }
2267 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2268 s->snapshots_offset, s->snapshots_size);
2269 if (ret < 0) {
2270 return ret;
2271 }
2272
2273
2274 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2275 s->refcount_table_offset,
2276 s->refcount_table_size *
2277 REFTABLE_ENTRY_SIZE);
2278 if (ret < 0) {
2279 return ret;
2280 }
2281
2282
2283 if (s->crypto_header.length) {
2284 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2285 s->crypto_header.offset,
2286 s->crypto_header.length);
2287 if (ret < 0) {
2288 return ret;
2289 }
2290 }
2291
2292
2293 ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters);
2294 if (ret < 0) {
2295 return ret;
2296 }
2297
2298 return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters);
2299}
2300
2301
2302
2303
2304
2305static void coroutine_fn
2306compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2307 BdrvCheckMode fix, bool *rebuild,
2308 int64_t *highest_cluster,
2309 void *refcount_table, int64_t nb_clusters)
2310{
2311 BDRVQcow2State *s = bs->opaque;
2312 int64_t i;
2313 uint64_t refcount1, refcount2;
2314 int ret;
2315
2316 for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
2317 ret = qcow2_get_refcount(bs, i, &refcount1);
2318 if (ret < 0) {
2319 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
2320 i, strerror(-ret));
2321 res->check_errors++;
2322 continue;
2323 }
2324
2325 refcount2 = s->get_refcount(refcount_table, i);
2326
2327 if (refcount1 > 0 || refcount2 > 0) {
2328 *highest_cluster = i;
2329 }
2330
2331 if (refcount1 != refcount2) {
2332
2333 int *num_fixed = NULL;
2334 if (refcount1 == 0) {
2335 *rebuild = true;
2336 } else if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
2337 num_fixed = &res->leaks_fixed;
2338 } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
2339 num_fixed = &res->corruptions_fixed;
2340 }
2341
2342 fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64
2343 " reference=%" PRIu64 "\n",
2344 num_fixed != NULL ? "Repairing" :
2345 refcount1 < refcount2 ? "ERROR" :
2346 "Leaked",
2347 i, refcount1, refcount2);
2348
2349 if (num_fixed) {
2350 ret = update_refcount(bs, i << s->cluster_bits, 1,
2351 refcount_diff(refcount1, refcount2),
2352 refcount1 > refcount2,
2353 QCOW2_DISCARD_ALWAYS);
2354 if (ret >= 0) {
2355 (*num_fixed)++;
2356 continue;
2357 }
2358 }
2359
2360
2361 if (refcount1 < refcount2) {
2362 res->corruptions++;
2363 } else {
2364 res->leaks++;
2365 }
2366 }
2367 }
2368}
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382static int64_t alloc_clusters_imrt(BlockDriverState *bs,
2383 int cluster_count,
2384 void **refcount_table,
2385 int64_t *imrt_nb_clusters,
2386 int64_t *first_free_cluster)
2387{
2388 BDRVQcow2State *s = bs->opaque;
2389 int64_t cluster = *first_free_cluster, i;
2390 bool first_gap = true;
2391 int contiguous_free_clusters;
2392 int ret;
2393
2394
2395
2396 for (contiguous_free_clusters = 0;
2397 cluster < *imrt_nb_clusters &&
2398 contiguous_free_clusters < cluster_count;
2399 cluster++)
2400 {
2401 if (!s->get_refcount(*refcount_table, cluster)) {
2402 contiguous_free_clusters++;
2403 if (first_gap) {
2404
2405
2406 *first_free_cluster = cluster;
2407 first_gap = false;
2408 }
2409 } else if (contiguous_free_clusters) {
2410 contiguous_free_clusters = 0;
2411 }
2412 }
2413
2414
2415
2416
2417
2418
2419
2420
2421 if (contiguous_free_clusters < cluster_count) {
2422
2423
2424
2425
2426
2427
2428 ret = realloc_refcount_array(s, refcount_table, imrt_nb_clusters,
2429 cluster + cluster_count
2430 - contiguous_free_clusters);
2431 if (ret < 0) {
2432 return ret;
2433 }
2434 }
2435
2436
2437 cluster -= contiguous_free_clusters;
2438 for (i = 0; i < cluster_count; i++) {
2439 s->set_refcount(*refcount_table, cluster + i, 1);
2440 }
2441
2442 return cluster << s->cluster_bits;
2443}
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470static int coroutine_fn GRAPH_RDLOCK
2471rebuild_refcounts_write_refblocks(
2472 BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
2473 int64_t first_cluster, int64_t end_cluster,
2474 uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr,
2475 Error **errp
2476 )
2477{
2478 BDRVQcow2State *s = bs->opaque;
2479 int64_t cluster;
2480 int64_t refblock_offset, refblock_start, refblock_index;
2481 int64_t first_free_cluster = 0;
2482 uint64_t *on_disk_reftable = *on_disk_reftable_ptr;
2483 uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr;
2484 void *on_disk_refblock;
2485 bool reftable_grown = false;
2486 int ret;
2487
2488 for (cluster = first_cluster; cluster < end_cluster; cluster++) {
2489
2490 if (!s->get_refcount(*refcount_table, cluster)) {
2491 continue;
2492 }
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503 refblock_index = cluster >> s->refcount_block_bits;
2504 refblock_start = refblock_index << s->refcount_block_bits;
2505
2506 if (on_disk_reftable_entries > refblock_index &&
2507 on_disk_reftable[refblock_index])
2508 {
2509
2510
2511
2512
2513
2514 refblock_offset = on_disk_reftable[refblock_index];
2515 } else {
2516 int64_t refblock_cluster_index;
2517
2518
2519 if (first_free_cluster < refblock_start) {
2520 first_free_cluster = refblock_start;
2521 }
2522 refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
2523 nb_clusters,
2524 &first_free_cluster);
2525 if (refblock_offset < 0) {
2526 error_setg_errno(errp, -refblock_offset,
2527 "ERROR allocating refblock");
2528 return refblock_offset;
2529 }
2530
2531 refblock_cluster_index = refblock_offset / s->cluster_size;
2532 if (refblock_cluster_index >= end_cluster) {
2533
2534
2535
2536
2537 end_cluster = refblock_cluster_index + 1;
2538 }
2539
2540 if (on_disk_reftable_entries <= refblock_index) {
2541 on_disk_reftable_entries =
2542 ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
2543 s->cluster_size) / REFTABLE_ENTRY_SIZE;
2544 on_disk_reftable =
2545 g_try_realloc(on_disk_reftable,
2546 on_disk_reftable_entries *
2547 REFTABLE_ENTRY_SIZE);
2548 if (!on_disk_reftable) {
2549 error_setg(errp, "ERROR allocating reftable memory");
2550 return -ENOMEM;
2551 }
2552
2553 memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0,
2554 (on_disk_reftable_entries -
2555 *on_disk_reftable_entries_ptr) *
2556 REFTABLE_ENTRY_SIZE);
2557
2558 *on_disk_reftable_ptr = on_disk_reftable;
2559 *on_disk_reftable_entries_ptr = on_disk_reftable_entries;
2560
2561 reftable_grown = true;
2562 } else {
2563 assert(on_disk_reftable);
2564 }
2565 on_disk_reftable[refblock_index] = refblock_offset;
2566 }
2567
2568
2569
2570 ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
2571 s->cluster_size, false);
2572 if (ret < 0) {
2573 error_setg_errno(errp, -ret, "ERROR writing refblock");
2574 return ret;
2575 }
2576
2577
2578
2579
2580
2581
2582
2583 on_disk_refblock = (void *)((char *) *refcount_table +
2584 refblock_index * s->cluster_size);
2585
2586 ret = bdrv_co_pwrite(bs->file, refblock_offset, s->cluster_size,
2587 on_disk_refblock, 0);
2588 if (ret < 0) {
2589 error_setg_errno(errp, -ret, "ERROR writing refblock");
2590 return ret;
2591 }
2592
2593
2594 cluster = refblock_start + s->refcount_block_size - 1;
2595 }
2596
2597 return reftable_grown;
2598}
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609static int coroutine_fn GRAPH_RDLOCK
2610rebuild_refcount_structure(BlockDriverState *bs, BdrvCheckResult *res,
2611 void **refcount_table, int64_t *nb_clusters,
2612 Error **errp)
2613{
2614 BDRVQcow2State *s = bs->opaque;
2615 int64_t reftable_offset = -1;
2616 int64_t reftable_length = 0;
2617 int64_t reftable_clusters;
2618 int64_t refblock_index;
2619 uint32_t on_disk_reftable_entries = 0;
2620 uint64_t *on_disk_reftable = NULL;
2621 int ret = 0;
2622 int reftable_size_changed = 0;
2623 struct {
2624 uint64_t reftable_offset;
2625 uint32_t reftable_clusters;
2626 } QEMU_PACKED reftable_offset_and_clusters;
2627
2628 qcow2_cache_empty(bs, s->refcount_block_cache);
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659 reftable_size_changed =
2660 rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
2661 0, *nb_clusters,
2662 &on_disk_reftable,
2663 &on_disk_reftable_entries, errp);
2664 if (reftable_size_changed < 0) {
2665 res->check_errors++;
2666 ret = reftable_size_changed;
2667 goto fail;
2668 }
2669
2670
2671
2672
2673
2674 assert(reftable_size_changed);
2675
2676 do {
2677 int64_t reftable_start_cluster, reftable_end_cluster;
2678 int64_t first_free_cluster = 0;
2679
2680 reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE;
2681 reftable_clusters = size_to_clusters(s, reftable_length);
2682
2683 reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
2684 refcount_table, nb_clusters,
2685 &first_free_cluster);
2686 if (reftable_offset < 0) {
2687 error_setg_errno(errp, -reftable_offset,
2688 "ERROR allocating reftable");
2689 res->check_errors++;
2690 ret = reftable_offset;
2691 goto fail;
2692 }
2693
2694
2695
2696
2697
2698 assert(offset_into_cluster(s, reftable_offset) == 0);
2699 reftable_start_cluster = reftable_offset / s->cluster_size;
2700 reftable_end_cluster = reftable_start_cluster + reftable_clusters;
2701 reftable_size_changed =
2702 rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
2703 reftable_start_cluster,
2704 reftable_end_cluster,
2705 &on_disk_reftable,
2706 &on_disk_reftable_entries, errp);
2707 if (reftable_size_changed < 0) {
2708 res->check_errors++;
2709 ret = reftable_size_changed;
2710 goto fail;
2711 }
2712
2713
2714
2715
2716
2717 } while (reftable_size_changed);
2718
2719
2720 assert(reftable_offset >= 0);
2721
2722
2723
2724
2725
2726
2727 for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
2728 refblock_index++)
2729 {
2730 cpu_to_be64s(&on_disk_reftable[refblock_index]);
2731 }
2732
2733 ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
2734 false);
2735 if (ret < 0) {
2736 error_setg_errno(errp, -ret, "ERROR writing reftable");
2737 goto fail;
2738 }
2739
2740 assert(reftable_length < INT_MAX);
2741 ret = bdrv_co_pwrite(bs->file, reftable_offset, reftable_length,
2742 on_disk_reftable, 0);
2743 if (ret < 0) {
2744 error_setg_errno(errp, -ret, "ERROR writing reftable");
2745 goto fail;
2746 }
2747
2748
2749 reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
2750 reftable_offset_and_clusters.reftable_clusters =
2751 cpu_to_be32(reftable_clusters);
2752 ret = bdrv_co_pwrite_sync(bs->file,
2753 offsetof(QCowHeader, refcount_table_offset),
2754 sizeof(reftable_offset_and_clusters),
2755 &reftable_offset_and_clusters, 0);
2756 if (ret < 0) {
2757 error_setg_errno(errp, -ret, "ERROR setting reftable");
2758 goto fail;
2759 }
2760
2761 for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
2762 refblock_index++)
2763 {
2764 be64_to_cpus(&on_disk_reftable[refblock_index]);
2765 }
2766 s->refcount_table = on_disk_reftable;
2767 s->refcount_table_offset = reftable_offset;
2768 s->refcount_table_size = on_disk_reftable_entries;
2769 update_max_refcount_table_index(s);
2770
2771 return 0;
2772
2773fail:
2774 g_free(on_disk_reftable);
2775 return ret;
2776}
2777
2778
2779
2780
2781
2782
2783
2784int coroutine_fn GRAPH_RDLOCK
2785qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2786{
2787 BDRVQcow2State *s = bs->opaque;
2788 BdrvCheckResult pre_compare_res;
2789 int64_t size, highest_cluster, nb_clusters;
2790 void *refcount_table = NULL;
2791 bool rebuild = false;
2792 int ret;
2793
2794 size = bdrv_co_getlength(bs->file->bs);
2795 if (size < 0) {
2796 res->check_errors++;
2797 return size;
2798 }
2799
2800 nb_clusters = size_to_clusters(s, size);
2801 if (nb_clusters > INT_MAX) {
2802 res->check_errors++;
2803 return -EFBIG;
2804 }
2805
2806 res->bfi.total_clusters =
2807 size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
2808
2809 ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table,
2810 &nb_clusters);
2811 if (ret < 0) {
2812 goto fail;
2813 }
2814
2815
2816
2817
2818 pre_compare_res = *res;
2819 compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcount_table,
2820 nb_clusters);
2821
2822 if (rebuild && (fix & BDRV_FIX_ERRORS)) {
2823 BdrvCheckResult old_res = *res;
2824 int fresh_leaks = 0;
2825 Error *local_err = NULL;
2826
2827 fprintf(stderr, "Rebuilding refcount structure\n");
2828 ret = rebuild_refcount_structure(bs, res, &refcount_table,
2829 &nb_clusters, &local_err);
2830 if (ret < 0) {
2831 error_report_err(local_err);
2832 goto fail;
2833 }
2834
2835 res->corruptions = 0;
2836 res->leaks = 0;
2837
2838
2839
2840 rebuild = false;
2841 memset(refcount_table, 0, refcount_array_byte_size(s, nb_clusters));
2842 ret = calculate_refcounts(bs, res, 0, &rebuild, &refcount_table,
2843 &nb_clusters);
2844 if (ret < 0) {
2845 goto fail;
2846 }
2847
2848 if (fix & BDRV_FIX_LEAKS) {
2849
2850
2851
2852 BdrvCheckResult saved_res = *res;
2853 *res = (BdrvCheckResult){ 0 };
2854
2855 compare_refcounts(bs, res, BDRV_FIX_LEAKS, &rebuild,
2856 &highest_cluster, refcount_table, nb_clusters);
2857 if (rebuild) {
2858 fprintf(stderr, "ERROR rebuilt refcount structure is still "
2859 "broken\n");
2860 }
2861
2862
2863
2864
2865 fresh_leaks = res->leaks;
2866 *res = saved_res;
2867 }
2868
2869 if (res->corruptions < old_res.corruptions) {
2870 res->corruptions_fixed += old_res.corruptions - res->corruptions;
2871 }
2872 if (res->leaks < old_res.leaks) {
2873 res->leaks_fixed += old_res.leaks - res->leaks;
2874 }
2875 res->leaks += fresh_leaks;
2876 } else if (fix) {
2877 if (rebuild) {
2878 fprintf(stderr, "ERROR need to rebuild refcount structures\n");
2879 res->check_errors++;
2880 ret = -EIO;
2881 goto fail;
2882 }
2883
2884 if (res->leaks || res->corruptions) {
2885 *res = pre_compare_res;
2886 compare_refcounts(bs, res, fix, &rebuild, &highest_cluster,
2887 refcount_table, nb_clusters);
2888 }
2889 }
2890
2891
2892 ret = check_oflag_copied(bs, res, fix);
2893 if (ret < 0) {
2894 goto fail;
2895 }
2896
2897 res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
2898 ret = 0;
2899
2900fail:
2901 g_free(refcount_table);
2902
2903 return ret;
2904}
2905
2906#define overlaps_with(ofs, sz) \
2907 ranges_overlap(offset, size, ofs, sz)
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
2924 int64_t size)
2925{
2926 BDRVQcow2State *s = bs->opaque;
2927 int chk = s->overlap_check & ~ign;
2928 int i, j;
2929
2930 if (!size) {
2931 return 0;
2932 }
2933
2934 if (chk & QCOW2_OL_MAIN_HEADER) {
2935 if (offset < s->cluster_size) {
2936 return QCOW2_OL_MAIN_HEADER;
2937 }
2938 }
2939
2940
2941 size = ROUND_UP(offset_into_cluster(s, offset) + size, s->cluster_size);
2942 offset = start_of_cluster(s, offset);
2943
2944 if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
2945 if (overlaps_with(s->l1_table_offset, s->l1_size * L1E_SIZE)) {
2946 return QCOW2_OL_ACTIVE_L1;
2947 }
2948 }
2949
2950 if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
2951 if (overlaps_with(s->refcount_table_offset,
2952 s->refcount_table_size * REFTABLE_ENTRY_SIZE)) {
2953 return QCOW2_OL_REFCOUNT_TABLE;
2954 }
2955 }
2956
2957 if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
2958 if (overlaps_with(s->snapshots_offset, s->snapshots_size)) {
2959 return QCOW2_OL_SNAPSHOT_TABLE;
2960 }
2961 }
2962
2963 if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
2964 for (i = 0; i < s->nb_snapshots; i++) {
2965 if (s->snapshots[i].l1_size &&
2966 overlaps_with(s->snapshots[i].l1_table_offset,
2967 s->snapshots[i].l1_size * L1E_SIZE)) {
2968 return QCOW2_OL_INACTIVE_L1;
2969 }
2970 }
2971 }
2972
2973 if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
2974 for (i = 0; i < s->l1_size; i++) {
2975 if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
2976 overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK,
2977 s->cluster_size)) {
2978 return QCOW2_OL_ACTIVE_L2;
2979 }
2980 }
2981 }
2982
2983 if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
2984 unsigned last_entry = s->max_refcount_table_index;
2985 assert(last_entry < s->refcount_table_size);
2986 assert(last_entry + 1 == s->refcount_table_size ||
2987 (s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0);
2988 for (i = 0; i <= last_entry; i++) {
2989 if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
2990 overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
2991 s->cluster_size)) {
2992 return QCOW2_OL_REFCOUNT_BLOCK;
2993 }
2994 }
2995 }
2996
2997 if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
2998 for (i = 0; i < s->nb_snapshots; i++) {
2999 uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
3000 uint32_t l1_sz = s->snapshots[i].l1_size;
3001 uint64_t l1_sz2 = l1_sz * L1E_SIZE;
3002 uint64_t *l1;
3003 int ret;
3004
3005 ret = qcow2_validate_table(bs, l1_ofs, l1_sz, L1E_SIZE,
3006 QCOW_MAX_L1_SIZE, "", NULL);
3007 if (ret < 0) {
3008 return ret;
3009 }
3010
3011 l1 = g_try_malloc(l1_sz2);
3012
3013 if (l1_sz2 && l1 == NULL) {
3014 return -ENOMEM;
3015 }
3016
3017 ret = bdrv_pread(bs->file, l1_ofs, l1_sz2, l1, 0);
3018 if (ret < 0) {
3019 g_free(l1);
3020 return ret;
3021 }
3022
3023 for (j = 0; j < l1_sz; j++) {
3024 uint64_t l2_ofs = be64_to_cpu(l1[j]) & L1E_OFFSET_MASK;
3025 if (l2_ofs && overlaps_with(l2_ofs, s->cluster_size)) {
3026 g_free(l1);
3027 return QCOW2_OL_INACTIVE_L2;
3028 }
3029 }
3030
3031 g_free(l1);
3032 }
3033 }
3034
3035 if ((chk & QCOW2_OL_BITMAP_DIRECTORY) &&
3036 (s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS))
3037 {
3038 if (overlaps_with(s->bitmap_directory_offset,
3039 s->bitmap_directory_size))
3040 {
3041 return QCOW2_OL_BITMAP_DIRECTORY;
3042 }
3043 }
3044
3045 return 0;
3046}
3047
3048static const char *metadata_ol_names[] = {
3049 [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header",
3050 [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table",
3051 [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table",
3052 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
3053 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
3054 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
3055 [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table",
3056 [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table",
3057 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = "bitmap directory",
3058};
3059QEMU_BUILD_BUG_ON(QCOW2_OL_MAX_BITNR != ARRAY_SIZE(metadata_ol_names));
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
3072 int64_t size, bool data_file)
3073{
3074 int ret;
3075
3076 if (data_file && has_data_file(bs)) {
3077 return 0;
3078 }
3079
3080 ret = qcow2_check_metadata_overlap(bs, ign, offset, size);
3081 if (ret < 0) {
3082 return ret;
3083 } else if (ret > 0) {
3084 int metadata_ol_bitnr = ctz32(ret);
3085 assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
3086
3087 qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid "
3088 "write on metadata (overlaps with %s)",
3089 metadata_ol_names[metadata_ol_bitnr]);
3090 return -EIO;
3091 }
3092
3093 return 0;
3094}
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
3107 uint64_t reftable_index, uint64_t *reftable_size,
3108 void *refblock, bool refblock_empty,
3109 bool *allocated, Error **errp);
3110
3111
3112
3113
3114
3115
3116static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
3117 uint64_t reftable_index, uint64_t *reftable_size,
3118 void *refblock, bool refblock_empty, bool *allocated,
3119 Error **errp)
3120{
3121 BDRVQcow2State *s = bs->opaque;
3122 int64_t offset;
3123
3124 if (!refblock_empty && reftable_index >= *reftable_size) {
3125 uint64_t *new_reftable;
3126 uint64_t new_reftable_size;
3127
3128 new_reftable_size = ROUND_UP(reftable_index + 1,
3129 s->cluster_size / REFTABLE_ENTRY_SIZE);
3130 if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / REFTABLE_ENTRY_SIZE) {
3131 error_setg(errp,
3132 "This operation would make the refcount table grow "
3133 "beyond the maximum size supported by QEMU, aborting");
3134 return -ENOTSUP;
3135 }
3136
3137 new_reftable = g_try_realloc(*reftable, new_reftable_size *
3138 REFTABLE_ENTRY_SIZE);
3139 if (!new_reftable) {
3140 error_setg(errp, "Failed to increase reftable buffer size");
3141 return -ENOMEM;
3142 }
3143
3144 memset(new_reftable + *reftable_size, 0,
3145 (new_reftable_size - *reftable_size) * REFTABLE_ENTRY_SIZE);
3146
3147 *reftable = new_reftable;
3148 *reftable_size = new_reftable_size;
3149 }
3150
3151 if (!refblock_empty && !(*reftable)[reftable_index]) {
3152 offset = qcow2_alloc_clusters(bs, s->cluster_size);
3153 if (offset < 0) {
3154 error_setg_errno(errp, -offset, "Failed to allocate refblock");
3155 return offset;
3156 }
3157 (*reftable)[reftable_index] = offset;
3158 *allocated = true;
3159 }
3160
3161 return 0;
3162}
3163
3164
3165
3166
3167
3168
3169static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
3170 uint64_t reftable_index, uint64_t *reftable_size,
3171 void *refblock, bool refblock_empty, bool *allocated,
3172 Error **errp)
3173{
3174 BDRVQcow2State *s = bs->opaque;
3175 int64_t offset;
3176 int ret;
3177
3178 if (reftable_index < *reftable_size && (*reftable)[reftable_index]) {
3179 offset = (*reftable)[reftable_index];
3180
3181 ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size,
3182 false);
3183 if (ret < 0) {
3184 error_setg_errno(errp, -ret, "Overlap check failed");
3185 return ret;
3186 }
3187
3188 ret = bdrv_pwrite(bs->file, offset, s->cluster_size, refblock, 0);
3189 if (ret < 0) {
3190 error_setg_errno(errp, -ret, "Failed to write refblock");
3191 return ret;
3192 }
3193 } else {
3194 assert(refblock_empty);
3195 }
3196
3197 return 0;
3198}
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
3214 uint64_t *new_reftable_index,
3215 uint64_t *new_reftable_size,
3216 void *new_refblock, int new_refblock_size,
3217 int new_refcount_bits,
3218 RefblockFinishOp *operation, bool *allocated,
3219 Qcow2SetRefcountFunc *new_set_refcount,
3220 BlockDriverAmendStatusCB *status_cb,
3221 void *cb_opaque, int index, int total,
3222 Error **errp)
3223{
3224 BDRVQcow2State *s = bs->opaque;
3225 uint64_t reftable_index;
3226 bool new_refblock_empty = true;
3227 int refblock_index;
3228 int new_refblock_index = 0;
3229 int ret;
3230
3231 for (reftable_index = 0; reftable_index < s->refcount_table_size;
3232 reftable_index++)
3233 {
3234 uint64_t refblock_offset = s->refcount_table[reftable_index]
3235 & REFT_OFFSET_MASK;
3236
3237 status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index,
3238 (uint64_t)total * s->refcount_table_size, cb_opaque);
3239
3240 if (refblock_offset) {
3241 void *refblock;
3242
3243 if (offset_into_cluster(s, refblock_offset)) {
3244 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
3245 PRIx64 " unaligned (reftable index: %#"
3246 PRIx64 ")", refblock_offset,
3247 reftable_index);
3248 error_setg(errp,
3249 "Image is corrupt (unaligned refblock offset)");
3250 return -EIO;
3251 }
3252
3253 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset,
3254 &refblock);
3255 if (ret < 0) {
3256 error_setg_errno(errp, -ret, "Failed to retrieve refblock");
3257 return ret;
3258 }
3259
3260 for (refblock_index = 0; refblock_index < s->refcount_block_size;
3261 refblock_index++)
3262 {
3263 uint64_t refcount;
3264
3265 if (new_refblock_index >= new_refblock_size) {
3266
3267 ret = operation(bs, new_reftable, *new_reftable_index,
3268 new_reftable_size, new_refblock,
3269 new_refblock_empty, allocated, errp);
3270 if (ret < 0) {
3271 qcow2_cache_put(s->refcount_block_cache, &refblock);
3272 return ret;
3273 }
3274
3275 (*new_reftable_index)++;
3276 new_refblock_index = 0;
3277 new_refblock_empty = true;
3278 }
3279
3280 refcount = s->get_refcount(refblock, refblock_index);
3281 if (new_refcount_bits < 64 && refcount >> new_refcount_bits) {
3282 uint64_t offset;
3283
3284 qcow2_cache_put(s->refcount_block_cache, &refblock);
3285
3286 offset = ((reftable_index << s->refcount_block_bits)
3287 + refblock_index) << s->cluster_bits;
3288
3289 error_setg(errp, "Cannot decrease refcount entry width to "
3290 "%i bits: Cluster at offset %#" PRIx64 " has a "
3291 "refcount of %" PRIu64, new_refcount_bits,
3292 offset, refcount);
3293 return -EINVAL;
3294 }
3295
3296 if (new_set_refcount) {
3297 new_set_refcount(new_refblock, new_refblock_index++,
3298 refcount);
3299 } else {
3300 new_refblock_index++;
3301 }
3302 new_refblock_empty = new_refblock_empty && refcount == 0;
3303 }
3304
3305 qcow2_cache_put(s->refcount_block_cache, &refblock);
3306 } else {
3307
3308 for (refblock_index = 0; refblock_index < s->refcount_block_size;
3309 refblock_index++)
3310 {
3311 if (new_refblock_index >= new_refblock_size) {
3312
3313 ret = operation(bs, new_reftable, *new_reftable_index,
3314 new_reftable_size, new_refblock,
3315 new_refblock_empty, allocated, errp);
3316 if (ret < 0) {
3317 return ret;
3318 }
3319
3320 (*new_reftable_index)++;
3321 new_refblock_index = 0;
3322 new_refblock_empty = true;
3323 }
3324
3325 if (new_set_refcount) {
3326 new_set_refcount(new_refblock, new_refblock_index++, 0);
3327 } else {
3328 new_refblock_index++;
3329 }
3330 }
3331 }
3332 }
3333
3334 if (new_refblock_index > 0) {
3335
3336 if (new_set_refcount) {
3337 for (; new_refblock_index < new_refblock_size;
3338 new_refblock_index++)
3339 {
3340 new_set_refcount(new_refblock, new_refblock_index, 0);
3341 }
3342 }
3343
3344 ret = operation(bs, new_reftable, *new_reftable_index,
3345 new_reftable_size, new_refblock, new_refblock_empty,
3346 allocated, errp);
3347 if (ret < 0) {
3348 return ret;
3349 }
3350
3351 (*new_reftable_index)++;
3352 }
3353
3354 status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size,
3355 (uint64_t)total * s->refcount_table_size, cb_opaque);
3356
3357 return 0;
3358}
3359
3360int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
3361 BlockDriverAmendStatusCB *status_cb,
3362 void *cb_opaque, Error **errp)
3363{
3364 BDRVQcow2State *s = bs->opaque;
3365 Qcow2GetRefcountFunc *new_get_refcount;
3366 Qcow2SetRefcountFunc *new_set_refcount;
3367 void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size);
3368 uint64_t *new_reftable = NULL, new_reftable_size = 0;
3369 uint64_t *old_reftable, old_reftable_size, old_reftable_offset;
3370 uint64_t new_reftable_index = 0;
3371 uint64_t i;
3372 int64_t new_reftable_offset = 0, allocated_reftable_size = 0;
3373 int new_refblock_size, new_refcount_bits = 1 << refcount_order;
3374 int old_refcount_order;
3375 int walk_index = 0;
3376 int ret;
3377 bool new_allocation;
3378
3379 assert(s->qcow_version >= 3);
3380 assert(refcount_order >= 0 && refcount_order <= 6);
3381
3382
3383 new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3));
3384
3385 new_get_refcount = get_refcount_funcs[refcount_order];
3386 new_set_refcount = set_refcount_funcs[refcount_order];
3387
3388
3389 do {
3390 int total_walks;
3391
3392 new_allocation = false;
3393
3394
3395
3396
3397
3398
3399 total_walks = MAX(walk_index + 2, 3);
3400
3401
3402
3403 ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
3404 &new_reftable_size, NULL, new_refblock_size,
3405 new_refcount_bits, &alloc_refblock,
3406 &new_allocation, NULL, status_cb, cb_opaque,
3407 walk_index++, total_walks, errp);
3408 if (ret < 0) {
3409 goto done;
3410 }
3411
3412 new_reftable_index = 0;
3413
3414 if (new_allocation) {
3415 if (new_reftable_offset) {
3416 qcow2_free_clusters(
3417 bs, new_reftable_offset,
3418 allocated_reftable_size * REFTABLE_ENTRY_SIZE,
3419 QCOW2_DISCARD_NEVER);
3420 }
3421
3422 new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size *
3423 REFTABLE_ENTRY_SIZE);
3424 if (new_reftable_offset < 0) {
3425 error_setg_errno(errp, -new_reftable_offset,
3426 "Failed to allocate the new reftable");
3427 ret = new_reftable_offset;
3428 goto done;
3429 }
3430 allocated_reftable_size = new_reftable_size;
3431 }
3432 } while (new_allocation);
3433
3434
3435 ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
3436 &new_reftable_size, new_refblock,
3437 new_refblock_size, new_refcount_bits,
3438 &flush_refblock, &new_allocation, new_set_refcount,
3439 status_cb, cb_opaque, walk_index, walk_index + 1,
3440 errp);
3441 if (ret < 0) {
3442 goto done;
3443 }
3444 assert(!new_allocation);
3445
3446
3447
3448 ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset,
3449 new_reftable_size * REFTABLE_ENTRY_SIZE,
3450 false);
3451 if (ret < 0) {
3452 error_setg_errno(errp, -ret, "Overlap check failed");
3453 goto done;
3454 }
3455
3456 for (i = 0; i < new_reftable_size; i++) {
3457 cpu_to_be64s(&new_reftable[i]);
3458 }
3459
3460 ret = bdrv_pwrite(bs->file, new_reftable_offset,
3461 new_reftable_size * REFTABLE_ENTRY_SIZE, new_reftable,
3462 0);
3463
3464 for (i = 0; i < new_reftable_size; i++) {
3465 be64_to_cpus(&new_reftable[i]);
3466 }
3467
3468 if (ret < 0) {
3469 error_setg_errno(errp, -ret, "Failed to write the new reftable");
3470 goto done;
3471 }
3472
3473
3474
3475 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
3476 if (ret < 0) {
3477 error_setg_errno(errp, -ret, "Failed to flush the refblock cache");
3478 goto done;
3479 }
3480
3481
3482
3483
3484
3485 old_refcount_order = s->refcount_order;
3486 old_reftable_size = s->refcount_table_size;
3487 old_reftable_offset = s->refcount_table_offset;
3488
3489 s->refcount_order = refcount_order;
3490 s->refcount_table_size = new_reftable_size;
3491 s->refcount_table_offset = new_reftable_offset;
3492
3493 ret = qcow2_update_header(bs);
3494 if (ret < 0) {
3495 s->refcount_order = old_refcount_order;
3496 s->refcount_table_size = old_reftable_size;
3497 s->refcount_table_offset = old_reftable_offset;
3498 error_setg_errno(errp, -ret, "Failed to update the qcow2 header");
3499 goto done;
3500 }
3501
3502
3503 old_reftable = s->refcount_table;
3504 s->refcount_table = new_reftable;
3505 update_max_refcount_table_index(s);
3506
3507 s->refcount_bits = 1 << refcount_order;
3508 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
3509 s->refcount_max += s->refcount_max - 1;
3510
3511 s->refcount_block_bits = s->cluster_bits - (refcount_order - 3);
3512 s->refcount_block_size = 1 << s->refcount_block_bits;
3513
3514 s->get_refcount = new_get_refcount;
3515 s->set_refcount = new_set_refcount;
3516
3517
3518
3519 new_reftable = old_reftable;
3520 new_reftable_size = old_reftable_size;
3521 new_reftable_offset = old_reftable_offset;
3522
3523done:
3524 if (new_reftable) {
3525
3526
3527
3528 for (i = 0; i < new_reftable_size; i++) {
3529 uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK;
3530 if (offset) {
3531 qcow2_free_clusters(bs, offset, s->cluster_size,
3532 QCOW2_DISCARD_OTHER);
3533 }
3534 }
3535 g_free(new_reftable);
3536
3537 if (new_reftable_offset > 0) {
3538 qcow2_free_clusters(bs, new_reftable_offset,
3539 new_reftable_size * REFTABLE_ENTRY_SIZE,
3540 QCOW2_DISCARD_OTHER);
3541 }
3542 }
3543
3544 qemu_vfree(new_refblock);
3545 return ret;
3546}
3547
3548static int64_t coroutine_fn get_refblock_offset(BlockDriverState *bs,
3549 uint64_t offset)
3550{
3551 BDRVQcow2State *s = bs->opaque;
3552 uint32_t index = offset_to_reftable_index(s, offset);
3553 int64_t covering_refblock_offset = 0;
3554
3555 if (index < s->refcount_table_size) {
3556 covering_refblock_offset = s->refcount_table[index] & REFT_OFFSET_MASK;
3557 }
3558 if (!covering_refblock_offset) {
3559 qcow2_signal_corruption(bs, true, -1, -1, "Refblock at %#" PRIx64 " is "
3560 "not covered by the refcount structures",
3561 offset);
3562 return -EIO;
3563 }
3564
3565 return covering_refblock_offset;
3566}
3567
3568static int coroutine_fn
3569qcow2_discard_refcount_block(BlockDriverState *bs, uint64_t discard_block_offs)
3570{
3571 BDRVQcow2State *s = bs->opaque;
3572 int64_t refblock_offs;
3573 uint64_t cluster_index = discard_block_offs >> s->cluster_bits;
3574 uint32_t block_index = cluster_index & (s->refcount_block_size - 1);
3575 void *refblock;
3576 int ret;
3577
3578 refblock_offs = get_refblock_offset(bs, discard_block_offs);
3579 if (refblock_offs < 0) {
3580 return refblock_offs;
3581 }
3582
3583 assert(discard_block_offs != 0);
3584
3585 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
3586 &refblock);
3587 if (ret < 0) {
3588 return ret;
3589 }
3590
3591 if (s->get_refcount(refblock, block_index) != 1) {
3592 qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:"
3593 " refblock offset %#" PRIx64
3594 ", reftable index %u"
3595 ", block offset %#" PRIx64
3596 ", refcount %#" PRIx64,
3597 refblock_offs,
3598 offset_to_reftable_index(s, discard_block_offs),
3599 discard_block_offs,
3600 s->get_refcount(refblock, block_index));
3601 qcow2_cache_put(s->refcount_block_cache, &refblock);
3602 return -EINVAL;
3603 }
3604 s->set_refcount(refblock, block_index, 0);
3605
3606 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock);
3607
3608 qcow2_cache_put(s->refcount_block_cache, &refblock);
3609
3610 if (cluster_index < s->free_cluster_index) {
3611 s->free_cluster_index = cluster_index;
3612 }
3613
3614 refblock = qcow2_cache_is_table_offset(s->refcount_block_cache,
3615 discard_block_offs);
3616 if (refblock) {
3617
3618 qcow2_cache_discard(s->refcount_block_cache, refblock);
3619 }
3620 update_refcount_discard(bs, discard_block_offs, s->cluster_size);
3621
3622 return 0;
3623}
3624
3625int coroutine_fn qcow2_shrink_reftable(BlockDriverState *bs)
3626{
3627 BDRVQcow2State *s = bs->opaque;
3628 uint64_t *reftable_tmp =
3629 g_malloc(s->refcount_table_size * REFTABLE_ENTRY_SIZE);
3630 int i, ret;
3631
3632 for (i = 0; i < s->refcount_table_size; i++) {
3633 int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK;
3634 void *refblock;
3635 bool unused_block;
3636
3637 if (refblock_offs == 0) {
3638 reftable_tmp[i] = 0;
3639 continue;
3640 }
3641 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
3642 &refblock);
3643 if (ret < 0) {
3644 goto out;
3645 }
3646
3647
3648 if (i == offset_to_reftable_index(s, refblock_offs)) {
3649 uint64_t block_index = (refblock_offs >> s->cluster_bits) &
3650 (s->refcount_block_size - 1);
3651 uint64_t refcount = s->get_refcount(refblock, block_index);
3652
3653 s->set_refcount(refblock, block_index, 0);
3654
3655 unused_block = buffer_is_zero(refblock, s->cluster_size);
3656
3657 s->set_refcount(refblock, block_index, refcount);
3658 } else {
3659 unused_block = buffer_is_zero(refblock, s->cluster_size);
3660 }
3661 qcow2_cache_put(s->refcount_block_cache, &refblock);
3662
3663 reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
3664 }
3665
3666 ret = bdrv_co_pwrite_sync(bs->file, s->refcount_table_offset,
3667 s->refcount_table_size * REFTABLE_ENTRY_SIZE,
3668 reftable_tmp, 0);
3669
3670
3671
3672
3673
3674 for (i = 0; i < s->refcount_table_size; i++) {
3675 if (s->refcount_table[i] && !reftable_tmp[i]) {
3676 if (ret == 0) {
3677 ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] &
3678 REFT_OFFSET_MASK);
3679 }
3680 s->refcount_table[i] = 0;
3681 }
3682 }
3683
3684 if (!s->cache_discards) {
3685 qcow2_process_discards(bs, ret);
3686 }
3687
3688out:
3689 g_free(reftable_tmp);
3690 return ret;
3691}
3692
3693int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
3694{
3695 BDRVQcow2State *s = bs->opaque;
3696 int64_t i;
3697
3698 for (i = size_to_clusters(s, size) - 1; i >= 0; i--) {
3699 uint64_t refcount;
3700 int ret = qcow2_get_refcount(bs, i, &refcount);
3701 if (ret < 0) {
3702 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
3703 i, strerror(-ret));
3704 return ret;
3705 }
3706 if (refcount > 0) {
3707 return i;
3708 }
3709 }
3710 qcow2_signal_corruption(bs, true, -1, -1,
3711 "There are no references in the refcount table.");
3712 return -EIO;
3713}
3714
3715int coroutine_fn GRAPH_RDLOCK
3716qcow2_detect_metadata_preallocation(BlockDriverState *bs)
3717{
3718 BDRVQcow2State *s = bs->opaque;
3719 int64_t i, end_cluster, cluster_count = 0, threshold;
3720 int64_t file_length, real_allocation, real_clusters;
3721
3722 qemu_co_mutex_assert_locked(&s->lock);
3723
3724 file_length = bdrv_co_getlength(bs->file->bs);
3725 if (file_length < 0) {
3726 return file_length;
3727 }
3728
3729 real_allocation = bdrv_co_get_allocated_file_size(bs->file->bs);
3730 if (real_allocation < 0) {
3731 return real_allocation;
3732 }
3733
3734 real_clusters = real_allocation / s->cluster_size;
3735 threshold = MAX(real_clusters * 10 / 9, real_clusters + 2);
3736
3737 end_cluster = size_to_clusters(s, file_length);
3738 for (i = 0; i < end_cluster && cluster_count < threshold; i++) {
3739 uint64_t refcount;
3740 int ret = qcow2_get_refcount(bs, i, &refcount);
3741 if (ret < 0) {
3742 return ret;
3743 }
3744 cluster_count += !!refcount;
3745 }
3746
3747 return cluster_count >= threshold;
3748}
3749