1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "qapi/error.h"
27#include "qcow2.h"
28#include "qemu/range.h"
29#include "qemu/bswap.h"
30#include "qemu/cutils.h"
31#include "qemu/memalign.h"
32#include "trace.h"
33
34static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
35 uint64_t max);
36
37G_GNUC_WARN_UNUSED_RESULT
38static int update_refcount(BlockDriverState *bs,
39 int64_t offset, int64_t length, uint64_t addend,
40 bool decrease, enum qcow2_discard_type type);
41
42static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index);
43static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index);
44static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index);
45static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index);
46static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index);
47static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index);
48static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index);
49
50static void set_refcount_ro0(void *refcount_array, uint64_t index,
51 uint64_t value);
52static void set_refcount_ro1(void *refcount_array, uint64_t index,
53 uint64_t value);
54static void set_refcount_ro2(void *refcount_array, uint64_t index,
55 uint64_t value);
56static void set_refcount_ro3(void *refcount_array, uint64_t index,
57 uint64_t value);
58static void set_refcount_ro4(void *refcount_array, uint64_t index,
59 uint64_t value);
60static void set_refcount_ro5(void *refcount_array, uint64_t index,
61 uint64_t value);
62static void set_refcount_ro6(void *refcount_array, uint64_t index,
63 uint64_t value);
64
65
66static Qcow2GetRefcountFunc *const get_refcount_funcs[] = {
67 &get_refcount_ro0,
68 &get_refcount_ro1,
69 &get_refcount_ro2,
70 &get_refcount_ro3,
71 &get_refcount_ro4,
72 &get_refcount_ro5,
73 &get_refcount_ro6
74};
75
76static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
77 &set_refcount_ro0,
78 &set_refcount_ro1,
79 &set_refcount_ro2,
80 &set_refcount_ro3,
81 &set_refcount_ro4,
82 &set_refcount_ro5,
83 &set_refcount_ro6
84};
85
86
87
88
89
90static void update_max_refcount_table_index(BDRVQcow2State *s)
91{
92 unsigned i = s->refcount_table_size - 1;
93 while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) {
94 i--;
95 }
96
97 s->max_refcount_table_index = i;
98}
99
100int coroutine_fn qcow2_refcount_init(BlockDriverState *bs)
101{
102 BDRVQcow2State *s = bs->opaque;
103 unsigned int refcount_table_size2, i;
104 int ret;
105
106 assert(s->refcount_order >= 0 && s->refcount_order <= 6);
107
108 s->get_refcount = get_refcount_funcs[s->refcount_order];
109 s->set_refcount = set_refcount_funcs[s->refcount_order];
110
111 assert(s->refcount_table_size <= INT_MAX / REFTABLE_ENTRY_SIZE);
112 refcount_table_size2 = s->refcount_table_size * REFTABLE_ENTRY_SIZE;
113 s->refcount_table = g_try_malloc(refcount_table_size2);
114
115 if (s->refcount_table_size > 0) {
116 if (s->refcount_table == NULL) {
117 ret = -ENOMEM;
118 goto fail;
119 }
120 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
121 ret = bdrv_co_pread(bs->file, s->refcount_table_offset,
122 refcount_table_size2, s->refcount_table, 0);
123 if (ret < 0) {
124 goto fail;
125 }
126 for(i = 0; i < s->refcount_table_size; i++)
127 be64_to_cpus(&s->refcount_table[i]);
128 update_max_refcount_table_index(s);
129 }
130 return 0;
131 fail:
132 return ret;
133}
134
135void qcow2_refcount_close(BlockDriverState *bs)
136{
137 BDRVQcow2State *s = bs->opaque;
138 g_free(s->refcount_table);
139}
140
141
142static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index)
143{
144 return (((const uint8_t *)refcount_array)[index / 8] >> (index % 8)) & 0x1;
145}
146
147static void set_refcount_ro0(void *refcount_array, uint64_t index,
148 uint64_t value)
149{
150 assert(!(value >> 1));
151 ((uint8_t *)refcount_array)[index / 8] &= ~(0x1 << (index % 8));
152 ((uint8_t *)refcount_array)[index / 8] |= value << (index % 8);
153}
154
155static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index)
156{
157 return (((const uint8_t *)refcount_array)[index / 4] >> (2 * (index % 4)))
158 & 0x3;
159}
160
161static void set_refcount_ro1(void *refcount_array, uint64_t index,
162 uint64_t value)
163{
164 assert(!(value >> 2));
165 ((uint8_t *)refcount_array)[index / 4] &= ~(0x3 << (2 * (index % 4)));
166 ((uint8_t *)refcount_array)[index / 4] |= value << (2 * (index % 4));
167}
168
169static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index)
170{
171 return (((const uint8_t *)refcount_array)[index / 2] >> (4 * (index % 2)))
172 & 0xf;
173}
174
175static void set_refcount_ro2(void *refcount_array, uint64_t index,
176 uint64_t value)
177{
178 assert(!(value >> 4));
179 ((uint8_t *)refcount_array)[index / 2] &= ~(0xf << (4 * (index % 2)));
180 ((uint8_t *)refcount_array)[index / 2] |= value << (4 * (index % 2));
181}
182
183static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index)
184{
185 return ((const uint8_t *)refcount_array)[index];
186}
187
188static void set_refcount_ro3(void *refcount_array, uint64_t index,
189 uint64_t value)
190{
191 assert(!(value >> 8));
192 ((uint8_t *)refcount_array)[index] = value;
193}
194
195static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index)
196{
197 return be16_to_cpu(((const uint16_t *)refcount_array)[index]);
198}
199
200static void set_refcount_ro4(void *refcount_array, uint64_t index,
201 uint64_t value)
202{
203 assert(!(value >> 16));
204 ((uint16_t *)refcount_array)[index] = cpu_to_be16(value);
205}
206
207static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index)
208{
209 return be32_to_cpu(((const uint32_t *)refcount_array)[index]);
210}
211
212static void set_refcount_ro5(void *refcount_array, uint64_t index,
213 uint64_t value)
214{
215 assert(!(value >> 32));
216 ((uint32_t *)refcount_array)[index] = cpu_to_be32(value);
217}
218
219static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index)
220{
221 return be64_to_cpu(((const uint64_t *)refcount_array)[index]);
222}
223
224static void set_refcount_ro6(void *refcount_array, uint64_t index,
225 uint64_t value)
226{
227 ((uint64_t *)refcount_array)[index] = cpu_to_be64(value);
228}
229
230
231static int load_refcount_block(BlockDriverState *bs,
232 int64_t refcount_block_offset,
233 void **refcount_block)
234{
235 BDRVQcow2State *s = bs->opaque;
236
237 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
238 return qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
239 refcount_block);
240}
241
242
243
244
245
246int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
247 uint64_t *refcount)
248{
249 BDRVQcow2State *s = bs->opaque;
250 uint64_t refcount_table_index, block_index;
251 int64_t refcount_block_offset;
252 int ret;
253 void *refcount_block;
254
255 refcount_table_index = cluster_index >> s->refcount_block_bits;
256 if (refcount_table_index >= s->refcount_table_size) {
257 *refcount = 0;
258 return 0;
259 }
260 refcount_block_offset =
261 s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
262 if (!refcount_block_offset) {
263 *refcount = 0;
264 return 0;
265 }
266
267 if (offset_into_cluster(s, refcount_block_offset)) {
268 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64
269 " unaligned (reftable index: %#" PRIx64 ")",
270 refcount_block_offset, refcount_table_index);
271 return -EIO;
272 }
273
274 ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
275 &refcount_block);
276 if (ret < 0) {
277 return ret;
278 }
279
280 block_index = cluster_index & (s->refcount_block_size - 1);
281 *refcount = s->get_refcount(refcount_block, block_index);
282
283 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
284
285 return 0;
286}
287
288
289static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
290 uint64_t offset_b)
291{
292 uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits);
293 uint64_t block_b = offset_b >> (s->cluster_bits + s->refcount_block_bits);
294
295 return (block_a == block_b);
296}
297
298
299
300
301
302
303
304static int alloc_refcount_block(BlockDriverState *bs,
305 int64_t cluster_index, void **refcount_block)
306{
307 BDRVQcow2State *s = bs->opaque;
308 unsigned int refcount_table_index;
309 int64_t ret;
310
311 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
312
313
314 refcount_table_index = cluster_index >> s->refcount_block_bits;
315
316 if (refcount_table_index < s->refcount_table_size) {
317
318 uint64_t refcount_block_offset =
319 s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
320
321
322 if (refcount_block_offset) {
323 if (offset_into_cluster(s, refcount_block_offset)) {
324 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
325 PRIx64 " unaligned (reftable index: "
326 "%#x)", refcount_block_offset,
327 refcount_table_index);
328 return -EIO;
329 }
330
331 return load_refcount_block(bs, refcount_block_offset,
332 refcount_block);
333 }
334 }
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359 *refcount_block = NULL;
360
361
362 ret = qcow2_cache_flush(bs, s->l2_table_cache);
363 if (ret < 0) {
364 return ret;
365 }
366
367
368 int64_t new_block = alloc_clusters_noref(bs, s->cluster_size, INT64_MAX);
369 if (new_block < 0) {
370 return new_block;
371 }
372
373
374 assert((new_block & REFT_OFFSET_MASK) == new_block);
375
376
377 if (new_block == 0) {
378 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
379 "allocation of refcount block at offset 0");
380 return -EIO;
381 }
382
383#ifdef DEBUG_ALLOC2
384 fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
385 " at %" PRIx64 "\n",
386 refcount_table_index, cluster_index << s->cluster_bits, new_block);
387#endif
388
389 if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
390
391 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
392 refcount_block);
393 if (ret < 0) {
394 goto fail;
395 }
396
397 memset(*refcount_block, 0, s->cluster_size);
398
399
400 int block_index = (new_block >> s->cluster_bits) &
401 (s->refcount_block_size - 1);
402 s->set_refcount(*refcount_block, block_index, 1);
403 } else {
404
405
406 ret = update_refcount(bs, new_block, s->cluster_size, 1, false,
407 QCOW2_DISCARD_NEVER);
408 if (ret < 0) {
409 goto fail;
410 }
411
412 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
413 if (ret < 0) {
414 goto fail;
415 }
416
417
418
419 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
420 refcount_block);
421 if (ret < 0) {
422 goto fail;
423 }
424
425 memset(*refcount_block, 0, s->cluster_size);
426 }
427
428
429 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
430 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
431 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
432 if (ret < 0) {
433 goto fail;
434 }
435
436
437 if (refcount_table_index < s->refcount_table_size) {
438 uint64_t data64 = cpu_to_be64(new_block);
439 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
440 ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
441 refcount_table_index * REFTABLE_ENTRY_SIZE,
442 sizeof(data64), &data64, 0);
443 if (ret < 0) {
444 goto fail;
445 }
446
447 s->refcount_table[refcount_table_index] = new_block;
448
449
450 s->max_refcount_table_index =
451 MAX(s->max_refcount_table_index, refcount_table_index);
452
453
454
455 return -EAGAIN;
456 }
457
458 qcow2_cache_put(s->refcount_block_cache, refcount_block);
459
460
461
462
463
464
465
466
467
468
469
470
471 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
472
473
474
475
476
477
478
479
480
481
482
483
484 uint64_t blocks_used = DIV_ROUND_UP(MAX(cluster_index + 1,
485 (new_block >> s->cluster_bits) + 1),
486 s->refcount_block_size);
487
488
489 uint64_t meta_offset = (blocks_used * s->refcount_block_size) *
490 s->cluster_size;
491
492 ret = qcow2_refcount_area(bs, meta_offset, 0, false,
493 refcount_table_index, new_block);
494 if (ret < 0) {
495 return ret;
496 }
497
498 ret = load_refcount_block(bs, new_block, refcount_block);
499 if (ret < 0) {
500 return ret;
501 }
502
503
504
505
506 return -EAGAIN;
507
508fail:
509 if (*refcount_block != NULL) {
510 qcow2_cache_put(s->refcount_block_cache, refcount_block);
511 }
512 return ret;
513}
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
531 uint64_t additional_clusters, bool exact_size,
532 int new_refblock_index,
533 uint64_t new_refblock_offset)
534{
535 BDRVQcow2State *s = bs->opaque;
536 uint64_t total_refblock_count_u64, additional_refblock_count;
537 int total_refblock_count, table_size, area_reftable_index, table_clusters;
538 int i;
539 uint64_t table_offset, block_offset, end_offset;
540 int ret;
541 uint64_t *new_table;
542
543 assert(!(start_offset % s->cluster_size));
544
545 qcow2_refcount_metadata_size(start_offset / s->cluster_size +
546 additional_clusters,
547 s->cluster_size, s->refcount_order,
548 !exact_size, &total_refblock_count_u64);
549 if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) {
550 return -EFBIG;
551 }
552 total_refblock_count = total_refblock_count_u64;
553
554
555
556
557
558 area_reftable_index = (start_offset / s->cluster_size) /
559 s->refcount_block_size;
560
561 if (exact_size) {
562 table_size = total_refblock_count;
563 } else {
564 table_size = total_refblock_count +
565 DIV_ROUND_UP(total_refblock_count, 2);
566 }
567
568 table_size = ROUND_UP(table_size, s->cluster_size / REFTABLE_ENTRY_SIZE);
569 table_clusters = (table_size * REFTABLE_ENTRY_SIZE) / s->cluster_size;
570
571 if (table_size > QCOW_MAX_REFTABLE_SIZE) {
572 return -EFBIG;
573 }
574
575 new_table = g_try_new0(uint64_t, table_size);
576
577 assert(table_size > 0);
578 if (new_table == NULL) {
579 ret = -ENOMEM;
580 goto fail;
581 }
582
583
584 if (table_size > s->max_refcount_table_index) {
585
586 memcpy(new_table, s->refcount_table,
587 (s->max_refcount_table_index + 1) * REFTABLE_ENTRY_SIZE);
588 } else {
589
590
591
592
593 memcpy(new_table, s->refcount_table, table_size * REFTABLE_ENTRY_SIZE);
594 }
595
596 if (new_refblock_offset) {
597 assert(new_refblock_index < total_refblock_count);
598 new_table[new_refblock_index] = new_refblock_offset;
599 }
600
601
602 additional_refblock_count = 0;
603 for (i = area_reftable_index; i < total_refblock_count; i++) {
604 if (!new_table[i]) {
605 additional_refblock_count++;
606 }
607 }
608
609 table_offset = start_offset + additional_refblock_count * s->cluster_size;
610 end_offset = table_offset + table_clusters * s->cluster_size;
611
612
613 block_offset = start_offset;
614 for (i = area_reftable_index; i < total_refblock_count; i++) {
615 void *refblock_data;
616 uint64_t first_offset_covered;
617
618
619 if (new_table[i]) {
620 ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i],
621 &refblock_data);
622 if (ret < 0) {
623 goto fail;
624 }
625 } else {
626 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache,
627 block_offset, &refblock_data);
628 if (ret < 0) {
629 goto fail;
630 }
631 memset(refblock_data, 0, s->cluster_size);
632 qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
633 refblock_data);
634
635 new_table[i] = block_offset;
636 block_offset += s->cluster_size;
637 }
638
639
640 first_offset_covered = (uint64_t)i * s->refcount_block_size *
641 s->cluster_size;
642 if (first_offset_covered < end_offset) {
643 int j, end_index;
644
645
646
647 if (first_offset_covered < start_offset) {
648 assert(i == area_reftable_index);
649 j = (start_offset - first_offset_covered) / s->cluster_size;
650 assert(j < s->refcount_block_size);
651 } else {
652 j = 0;
653 }
654
655 end_index = MIN((end_offset - first_offset_covered) /
656 s->cluster_size,
657 s->refcount_block_size);
658
659 for (; j < end_index; j++) {
660
661 assert(s->get_refcount(refblock_data, j) == 0);
662 s->set_refcount(refblock_data, j, 1);
663 }
664
665 qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
666 refblock_data);
667 }
668
669 qcow2_cache_put(s->refcount_block_cache, &refblock_data);
670 }
671
672 assert(block_offset == table_offset);
673
674
675 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
676 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
677 if (ret < 0) {
678 goto fail;
679 }
680
681
682 for (i = 0; i < total_refblock_count; i++) {
683 cpu_to_be64s(&new_table[i]);
684 }
685
686 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
687 ret = bdrv_pwrite_sync(bs->file, table_offset,
688 table_size * REFTABLE_ENTRY_SIZE, new_table, 0);
689 if (ret < 0) {
690 goto fail;
691 }
692
693 for (i = 0; i < total_refblock_count; i++) {
694 be64_to_cpus(&new_table[i]);
695 }
696
697
698 struct QEMU_PACKED {
699 uint64_t d64;
700 uint32_t d32;
701 } data;
702 data.d64 = cpu_to_be64(table_offset);
703 data.d32 = cpu_to_be32(table_clusters);
704 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
705 ret = bdrv_pwrite_sync(bs->file,
706 offsetof(QCowHeader, refcount_table_offset),
707 sizeof(data), &data, 0);
708 if (ret < 0) {
709 goto fail;
710 }
711
712
713 uint64_t old_table_offset = s->refcount_table_offset;
714 uint64_t old_table_size = s->refcount_table_size;
715
716 g_free(s->refcount_table);
717 s->refcount_table = new_table;
718 s->refcount_table_size = table_size;
719 s->refcount_table_offset = table_offset;
720 update_max_refcount_table_index(s);
721
722
723 qcow2_free_clusters(bs, old_table_offset,
724 old_table_size * REFTABLE_ENTRY_SIZE,
725 QCOW2_DISCARD_OTHER);
726
727 return end_offset;
728
729fail:
730 g_free(new_table);
731 return ret;
732}
733
734void qcow2_process_discards(BlockDriverState *bs, int ret)
735{
736 BDRVQcow2State *s = bs->opaque;
737 Qcow2DiscardRegion *d, *next;
738
739 QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
740 QTAILQ_REMOVE(&s->discards, d, next);
741
742
743 if (ret >= 0) {
744 int r2 = bdrv_pdiscard(bs->file, d->offset, d->bytes);
745 if (r2 < 0) {
746 trace_qcow2_process_discards_failed_region(d->offset, d->bytes,
747 r2);
748 }
749 }
750
751 g_free(d);
752 }
753}
754
755static void update_refcount_discard(BlockDriverState *bs,
756 uint64_t offset, uint64_t length)
757{
758 BDRVQcow2State *s = bs->opaque;
759 Qcow2DiscardRegion *d, *p, *next;
760
761 QTAILQ_FOREACH(d, &s->discards, next) {
762 uint64_t new_start = MIN(offset, d->offset);
763 uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
764
765 if (new_end - new_start <= length + d->bytes) {
766
767
768
769 assert(d->bytes + length == new_end - new_start);
770 d->offset = new_start;
771 d->bytes = new_end - new_start;
772 goto found;
773 }
774 }
775
776 d = g_malloc(sizeof(*d));
777 *d = (Qcow2DiscardRegion) {
778 .bs = bs,
779 .offset = offset,
780 .bytes = length,
781 };
782 QTAILQ_INSERT_TAIL(&s->discards, d, next);
783
784found:
785
786 QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
787 if (p == d
788 || p->offset > d->offset + d->bytes
789 || d->offset > p->offset + p->bytes)
790 {
791 continue;
792 }
793
794
795 assert(p->offset == d->offset + d->bytes
796 || d->offset == p->offset + p->bytes);
797
798 QTAILQ_REMOVE(&s->discards, p, next);
799 d->offset = MIN(d->offset, p->offset);
800 d->bytes += p->bytes;
801 g_free(p);
802 }
803}
804
805
806
807
808static int update_refcount(BlockDriverState *bs,
809 int64_t offset,
810 int64_t length,
811 uint64_t addend,
812 bool decrease,
813 enum qcow2_discard_type type)
814{
815 BDRVQcow2State *s = bs->opaque;
816 int64_t start, last, cluster_offset;
817 void *refcount_block = NULL;
818 int64_t old_table_index = -1;
819 int ret;
820
821#ifdef DEBUG_ALLOC2
822 fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64
823 " addend=%s%" PRIu64 "\n", offset, length, decrease ? "-" : "",
824 addend);
825#endif
826 if (length < 0) {
827 return -EINVAL;
828 } else if (length == 0) {
829 return 0;
830 }
831
832 if (decrease) {
833 qcow2_cache_set_dependency(bs, s->refcount_block_cache,
834 s->l2_table_cache);
835 }
836
837 start = start_of_cluster(s, offset);
838 last = start_of_cluster(s, offset + length - 1);
839 for(cluster_offset = start; cluster_offset <= last;
840 cluster_offset += s->cluster_size)
841 {
842 int block_index;
843 uint64_t refcount;
844 int64_t cluster_index = cluster_offset >> s->cluster_bits;
845 int64_t table_index = cluster_index >> s->refcount_block_bits;
846
847
848 if (table_index != old_table_index) {
849 if (refcount_block) {
850 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
851 }
852 ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
853
854
855 if (ret == -EAGAIN) {
856 if (s->free_cluster_index > (start >> s->cluster_bits)) {
857 s->free_cluster_index = (start >> s->cluster_bits);
858 }
859 }
860 if (ret < 0) {
861 goto fail;
862 }
863 }
864 old_table_index = table_index;
865
866 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
867
868
869 block_index = cluster_index & (s->refcount_block_size - 1);
870
871 refcount = s->get_refcount(refcount_block, block_index);
872 if (decrease ? (refcount - addend > refcount)
873 : (refcount + addend < refcount ||
874 refcount + addend > s->refcount_max))
875 {
876 ret = -EINVAL;
877 goto fail;
878 }
879 if (decrease) {
880 refcount -= addend;
881 } else {
882 refcount += addend;
883 }
884 if (refcount == 0 && cluster_index < s->free_cluster_index) {
885 s->free_cluster_index = cluster_index;
886 }
887 s->set_refcount(refcount_block, block_index, refcount);
888
889 if (refcount == 0) {
890 void *table;
891
892 table = qcow2_cache_is_table_offset(s->refcount_block_cache,
893 offset);
894 if (table != NULL) {
895 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
896 old_table_index = -1;
897 qcow2_cache_discard(s->refcount_block_cache, table);
898 }
899
900 table = qcow2_cache_is_table_offset(s->l2_table_cache, offset);
901 if (table != NULL) {
902 qcow2_cache_discard(s->l2_table_cache, table);
903 }
904
905 if (s->discard_passthrough[type]) {
906 update_refcount_discard(bs, cluster_offset, s->cluster_size);
907 }
908 }
909 }
910
911 ret = 0;
912fail:
913 if (!s->cache_discards) {
914 qcow2_process_discards(bs, ret);
915 }
916
917
918 if (refcount_block) {
919 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
920 }
921
922
923
924
925
926 if (ret < 0) {
927 int dummy;
928 dummy = update_refcount(bs, offset, cluster_offset - offset, addend,
929 !decrease, QCOW2_DISCARD_NEVER);
930 (void)dummy;
931 }
932
933 return ret;
934}
935
936
937
938
939
940
941
942
943
944int qcow2_update_cluster_refcount(BlockDriverState *bs,
945 int64_t cluster_index,
946 uint64_t addend, bool decrease,
947 enum qcow2_discard_type type)
948{
949 BDRVQcow2State *s = bs->opaque;
950 int ret;
951
952 ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
953 decrease, type);
954 if (ret < 0) {
955 return ret;
956 }
957
958 return 0;
959}
960
961
962
963
964
965
966
967
968
969static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
970 uint64_t max)
971{
972 BDRVQcow2State *s = bs->opaque;
973 uint64_t i, nb_clusters, refcount;
974 int ret;
975
976
977 if (s->cache_discards) {
978 qcow2_process_discards(bs, 0);
979 }
980
981 nb_clusters = size_to_clusters(s, size);
982retry:
983 for(i = 0; i < nb_clusters; i++) {
984 uint64_t next_cluster_index = s->free_cluster_index++;
985 ret = qcow2_get_refcount(bs, next_cluster_index, &refcount);
986
987 if (ret < 0) {
988 return ret;
989 } else if (refcount != 0) {
990 goto retry;
991 }
992 }
993
994
995
996 if (s->free_cluster_index > 0 &&
997 s->free_cluster_index - 1 > (max >> s->cluster_bits))
998 {
999 return -EFBIG;
1000 }
1001
1002#ifdef DEBUG_ALLOC2
1003 fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
1004 size,
1005 (s->free_cluster_index - nb_clusters) << s->cluster_bits);
1006#endif
1007 return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
1008}
1009
1010int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
1011{
1012 int64_t offset;
1013 int ret;
1014
1015 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
1016 do {
1017 offset = alloc_clusters_noref(bs, size, QCOW_MAX_CLUSTER_OFFSET);
1018 if (offset < 0) {
1019 return offset;
1020 }
1021
1022 ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
1023 } while (ret == -EAGAIN);
1024
1025 if (ret < 0) {
1026 return ret;
1027 }
1028
1029 return offset;
1030}
1031
1032int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
1033 int64_t nb_clusters)
1034{
1035 BDRVQcow2State *s = bs->opaque;
1036 uint64_t cluster_index, refcount;
1037 uint64_t i;
1038 int ret;
1039
1040 assert(nb_clusters >= 0);
1041 if (nb_clusters == 0) {
1042 return 0;
1043 }
1044
1045 do {
1046
1047 cluster_index = offset >> s->cluster_bits;
1048 for(i = 0; i < nb_clusters; i++) {
1049 ret = qcow2_get_refcount(bs, cluster_index++, &refcount);
1050 if (ret < 0) {
1051 return ret;
1052 } else if (refcount != 0) {
1053 break;
1054 }
1055 }
1056
1057
1058 ret = update_refcount(bs, offset, i << s->cluster_bits, 1, false,
1059 QCOW2_DISCARD_NEVER);
1060 } while (ret == -EAGAIN);
1061
1062 if (ret < 0) {
1063 return ret;
1064 }
1065
1066 return i;
1067}
1068
1069
1070
1071int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
1072{
1073 BDRVQcow2State *s = bs->opaque;
1074 int64_t offset;
1075 size_t free_in_cluster;
1076 int ret;
1077
1078 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
1079 assert(size > 0 && size <= s->cluster_size);
1080 assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset));
1081
1082 offset = s->free_byte_offset;
1083
1084 if (offset) {
1085 uint64_t refcount;
1086 ret = qcow2_get_refcount(bs, offset >> s->cluster_bits, &refcount);
1087 if (ret < 0) {
1088 return ret;
1089 }
1090
1091 if (refcount == s->refcount_max) {
1092 offset = 0;
1093 }
1094 }
1095
1096 free_in_cluster = s->cluster_size - offset_into_cluster(s, offset);
1097 do {
1098 if (!offset || free_in_cluster < size) {
1099 int64_t new_cluster;
1100
1101 new_cluster = alloc_clusters_noref(bs, s->cluster_size,
1102 MIN(s->cluster_offset_mask,
1103 QCOW_MAX_CLUSTER_OFFSET));
1104 if (new_cluster < 0) {
1105 return new_cluster;
1106 }
1107
1108 if (new_cluster == 0) {
1109 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
1110 "allocation of compressed cluster "
1111 "at offset 0");
1112 return -EIO;
1113 }
1114
1115 if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
1116 offset = new_cluster;
1117 free_in_cluster = s->cluster_size;
1118 } else {
1119 free_in_cluster += s->cluster_size;
1120 }
1121 }
1122
1123 assert(offset);
1124 ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
1125 if (ret < 0) {
1126 offset = 0;
1127 }
1128 } while (ret == -EAGAIN);
1129 if (ret < 0) {
1130 return ret;
1131 }
1132
1133
1134
1135 qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
1136
1137 s->free_byte_offset = offset + size;
1138 if (!offset_into_cluster(s, s->free_byte_offset)) {
1139 s->free_byte_offset = 0;
1140 }
1141
1142 return offset;
1143}
1144
1145void qcow2_free_clusters(BlockDriverState *bs,
1146 int64_t offset, int64_t size,
1147 enum qcow2_discard_type type)
1148{
1149 int ret;
1150
1151 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
1152 ret = update_refcount(bs, offset, size, 1, true, type);
1153 if (ret < 0) {
1154 fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
1155
1156 }
1157}
1158
1159
1160
1161
1162
1163void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
1164 enum qcow2_discard_type type)
1165{
1166 BDRVQcow2State *s = bs->opaque;
1167 QCow2ClusterType ctype = qcow2_get_cluster_type(bs, l2_entry);
1168
1169 if (has_data_file(bs)) {
1170 if (s->discard_passthrough[type] &&
1171 (ctype == QCOW2_CLUSTER_NORMAL ||
1172 ctype == QCOW2_CLUSTER_ZERO_ALLOC))
1173 {
1174 bdrv_pdiscard(s->data_file, l2_entry & L2E_OFFSET_MASK,
1175 s->cluster_size);
1176 }
1177 return;
1178 }
1179
1180 switch (ctype) {
1181 case QCOW2_CLUSTER_COMPRESSED:
1182 {
1183 uint64_t coffset;
1184 int csize;
1185
1186 qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
1187 qcow2_free_clusters(bs, coffset, csize, type);
1188 }
1189 break;
1190 case QCOW2_CLUSTER_NORMAL:
1191 case QCOW2_CLUSTER_ZERO_ALLOC:
1192 if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
1193 qcow2_signal_corruption(bs, false, -1, -1,
1194 "Cannot free unaligned cluster %#llx",
1195 l2_entry & L2E_OFFSET_MASK);
1196 } else {
1197 qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
1198 s->cluster_size, type);
1199 }
1200 break;
1201 case QCOW2_CLUSTER_ZERO_PLAIN:
1202 case QCOW2_CLUSTER_UNALLOCATED:
1203 break;
1204 default:
1205 abort();
1206 }
1207}
1208
1209int qcow2_write_caches(BlockDriverState *bs)
1210{
1211 BDRVQcow2State *s = bs->opaque;
1212 int ret;
1213
1214 ret = qcow2_cache_write(bs, s->l2_table_cache);
1215 if (ret < 0) {
1216 return ret;
1217 }
1218
1219 if (qcow2_need_accurate_refcounts(s)) {
1220 ret = qcow2_cache_write(bs, s->refcount_block_cache);
1221 if (ret < 0) {
1222 return ret;
1223 }
1224 }
1225
1226 return 0;
1227}
1228
1229int qcow2_flush_caches(BlockDriverState *bs)
1230{
1231 int ret = qcow2_write_caches(bs);
1232 if (ret < 0) {
1233 return ret;
1234 }
1235
1236 return bdrv_flush(bs->file->bs);
1237}
1238
1239
1240
1241
1242
1243
1244
1245int qcow2_update_snapshot_refcount(BlockDriverState *bs,
1246 int64_t l1_table_offset, int l1_size, int addend)
1247{
1248 BDRVQcow2State *s = bs->opaque;
1249 uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount;
1250 bool l1_allocated = false;
1251 int64_t old_entry, old_l2_offset;
1252 unsigned slice, slice_size2, n_slices;
1253 int i, j, l1_modified = 0;
1254 int ret;
1255
1256 assert(addend >= -1 && addend <= 1);
1257
1258 l2_slice = NULL;
1259 l1_table = NULL;
1260 l1_size2 = l1_size * L1E_SIZE;
1261 slice_size2 = s->l2_slice_size * l2_entry_size(s);
1262 n_slices = s->cluster_size / slice_size2;
1263
1264 s->cache_discards = true;
1265
1266
1267
1268
1269 if (l1_table_offset != s->l1_table_offset) {
1270 l1_table = g_try_malloc0(l1_size2);
1271 if (l1_size2 && l1_table == NULL) {
1272 ret = -ENOMEM;
1273 goto fail;
1274 }
1275 l1_allocated = true;
1276
1277 ret = bdrv_pread(bs->file, l1_table_offset, l1_size2, l1_table, 0);
1278 if (ret < 0) {
1279 goto fail;
1280 }
1281
1282 for (i = 0; i < l1_size; i++) {
1283 be64_to_cpus(&l1_table[i]);
1284 }
1285 } else {
1286 assert(l1_size == s->l1_size);
1287 l1_table = s->l1_table;
1288 l1_allocated = false;
1289 }
1290
1291 for (i = 0; i < l1_size; i++) {
1292 l2_offset = l1_table[i];
1293 if (l2_offset) {
1294 old_l2_offset = l2_offset;
1295 l2_offset &= L1E_OFFSET_MASK;
1296
1297 if (offset_into_cluster(s, l2_offset)) {
1298 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
1299 PRIx64 " unaligned (L1 index: %#x)",
1300 l2_offset, i);
1301 ret = -EIO;
1302 goto fail;
1303 }
1304
1305 for (slice = 0; slice < n_slices; slice++) {
1306 ret = qcow2_cache_get(bs, s->l2_table_cache,
1307 l2_offset + slice * slice_size2,
1308 (void **) &l2_slice);
1309 if (ret < 0) {
1310 goto fail;
1311 }
1312
1313 for (j = 0; j < s->l2_slice_size; j++) {
1314 uint64_t cluster_index;
1315 uint64_t offset;
1316
1317 entry = get_l2_entry(s, l2_slice, j);
1318 old_entry = entry;
1319 entry &= ~QCOW_OFLAG_COPIED;
1320 offset = entry & L2E_OFFSET_MASK;
1321
1322 switch (qcow2_get_cluster_type(bs, entry)) {
1323 case QCOW2_CLUSTER_COMPRESSED:
1324 if (addend != 0) {
1325 uint64_t coffset;
1326 int csize;
1327
1328 qcow2_parse_compressed_l2_entry(bs, entry,
1329 &coffset, &csize);
1330 ret = update_refcount(
1331 bs, coffset, csize,
1332 abs(addend), addend < 0,
1333 QCOW2_DISCARD_SNAPSHOT);
1334 if (ret < 0) {
1335 goto fail;
1336 }
1337 }
1338
1339 refcount = 2;
1340 break;
1341
1342 case QCOW2_CLUSTER_NORMAL:
1343 case QCOW2_CLUSTER_ZERO_ALLOC:
1344 if (offset_into_cluster(s, offset)) {
1345
1346 int l2_index = slice * s->l2_slice_size + j;
1347 qcow2_signal_corruption(
1348 bs, true, -1, -1, "Cluster "
1349 "allocation offset %#" PRIx64
1350 " unaligned (L2 offset: %#"
1351 PRIx64 ", L2 index: %#x)",
1352 offset, l2_offset, l2_index);
1353 ret = -EIO;
1354 goto fail;
1355 }
1356
1357 cluster_index = offset >> s->cluster_bits;
1358 assert(cluster_index);
1359 if (addend != 0) {
1360 ret = qcow2_update_cluster_refcount(
1361 bs, cluster_index, abs(addend), addend < 0,
1362 QCOW2_DISCARD_SNAPSHOT);
1363 if (ret < 0) {
1364 goto fail;
1365 }
1366 }
1367
1368 ret = qcow2_get_refcount(bs, cluster_index, &refcount);
1369 if (ret < 0) {
1370 goto fail;
1371 }
1372 break;
1373
1374 case QCOW2_CLUSTER_ZERO_PLAIN:
1375 case QCOW2_CLUSTER_UNALLOCATED:
1376 refcount = 0;
1377 break;
1378
1379 default:
1380 abort();
1381 }
1382
1383 if (refcount == 1) {
1384 entry |= QCOW_OFLAG_COPIED;
1385 }
1386 if (entry != old_entry) {
1387 if (addend > 0) {
1388 qcow2_cache_set_dependency(bs, s->l2_table_cache,
1389 s->refcount_block_cache);
1390 }
1391 set_l2_entry(s, l2_slice, j, entry);
1392 qcow2_cache_entry_mark_dirty(s->l2_table_cache,
1393 l2_slice);
1394 }
1395 }
1396
1397 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1398 }
1399
1400 if (addend != 0) {
1401 ret = qcow2_update_cluster_refcount(bs, l2_offset >>
1402 s->cluster_bits,
1403 abs(addend), addend < 0,
1404 QCOW2_DISCARD_SNAPSHOT);
1405 if (ret < 0) {
1406 goto fail;
1407 }
1408 }
1409 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
1410 &refcount);
1411 if (ret < 0) {
1412 goto fail;
1413 } else if (refcount == 1) {
1414 l2_offset |= QCOW_OFLAG_COPIED;
1415 }
1416 if (l2_offset != old_l2_offset) {
1417 l1_table[i] = l2_offset;
1418 l1_modified = 1;
1419 }
1420 }
1421 }
1422
1423 ret = bdrv_flush(bs);
1424fail:
1425 if (l2_slice) {
1426 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1427 }
1428
1429 s->cache_discards = false;
1430 qcow2_process_discards(bs, ret);
1431
1432
1433 if (ret == 0 && addend >= 0 && l1_modified) {
1434 for (i = 0; i < l1_size; i++) {
1435 cpu_to_be64s(&l1_table[i]);
1436 }
1437
1438 ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_size2, l1_table,
1439 0);
1440
1441 for (i = 0; i < l1_size; i++) {
1442 be64_to_cpus(&l1_table[i]);
1443 }
1444 }
1445 if (l1_allocated)
1446 g_free(l1_table);
1447 return ret;
1448}
1449
1450
1451
1452
1453
1454
1455
1456
1457static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries)
1458{
1459
1460
1461
1462
1463 assert(entries < (UINT64_C(1) << (64 - 9)));
1464
1465
1466
1467
1468 return DIV_ROUND_UP(entries << s->refcount_order, 8);
1469}
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480static int realloc_refcount_array(BDRVQcow2State *s, void **array,
1481 int64_t *size, int64_t new_size)
1482{
1483 int64_t old_byte_size, new_byte_size;
1484 void *new_ptr;
1485
1486
1487 old_byte_size = size_to_clusters(s, refcount_array_byte_size(s, *size))
1488 * s->cluster_size;
1489 new_byte_size = size_to_clusters(s, refcount_array_byte_size(s, new_size))
1490 * s->cluster_size;
1491
1492 if (new_byte_size == old_byte_size) {
1493 *size = new_size;
1494 return 0;
1495 }
1496
1497 assert(new_byte_size > 0);
1498
1499 if (new_byte_size > SIZE_MAX) {
1500 return -ENOMEM;
1501 }
1502
1503 new_ptr = g_try_realloc(*array, new_byte_size);
1504 if (!new_ptr) {
1505 return -ENOMEM;
1506 }
1507
1508 if (new_byte_size > old_byte_size) {
1509 memset((char *)new_ptr + old_byte_size, 0,
1510 new_byte_size - old_byte_size);
1511 }
1512
1513 *array = new_ptr;
1514 *size = new_size;
1515
1516 return 0;
1517}
1518
1519
1520
1521
1522
1523
1524
1525
1526int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
1527 void **refcount_table,
1528 int64_t *refcount_table_size,
1529 int64_t offset, int64_t size)
1530{
1531 BDRVQcow2State *s = bs->opaque;
1532 uint64_t start, last, cluster_offset, k, refcount;
1533 int64_t file_len;
1534 int ret;
1535
1536 if (size <= 0) {
1537 return 0;
1538 }
1539
1540 file_len = bdrv_getlength(bs->file->bs);
1541 if (file_len < 0) {
1542 return file_len;
1543 }
1544
1545
1546
1547
1548
1549
1550 if (offset + size - file_len >= s->cluster_size) {
1551 fprintf(stderr, "ERROR: counting reference for region exceeding the "
1552 "end of the file by one cluster or more: offset 0x%" PRIx64
1553 " size 0x%" PRIx64 "\n", offset, size);
1554 res->corruptions++;
1555 return 0;
1556 }
1557
1558 start = start_of_cluster(s, offset);
1559 last = start_of_cluster(s, offset + size - 1);
1560 for(cluster_offset = start; cluster_offset <= last;
1561 cluster_offset += s->cluster_size) {
1562 k = cluster_offset >> s->cluster_bits;
1563 if (k >= *refcount_table_size) {
1564 ret = realloc_refcount_array(s, refcount_table,
1565 refcount_table_size, k + 1);
1566 if (ret < 0) {
1567 res->check_errors++;
1568 return ret;
1569 }
1570 }
1571
1572 refcount = s->get_refcount(*refcount_table, k);
1573 if (refcount == s->refcount_max) {
1574 fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
1575 "\n", cluster_offset);
1576 fprintf(stderr, "Use qemu-img amend to increase the refcount entry "
1577 "width or qemu-img convert to create a clean copy if the "
1578 "image cannot be opened for writing\n");
1579 res->corruptions++;
1580 continue;
1581 }
1582 s->set_refcount(*refcount_table, k, refcount + 1);
1583 }
1584
1585 return 0;
1586}
1587
1588
1589enum {
1590 CHECK_FRAG_INFO = 0x2,
1591};
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
1603 uint64_t l2_offset,
1604 uint64_t *l2_table, int l2_index, bool active,
1605 bool *metadata_overlap)
1606{
1607 BDRVQcow2State *s = bs->opaque;
1608 int ret;
1609 int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t));
1610 uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s);
1611 int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2;
1612
1613 if (has_subclusters(s)) {
1614 uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, l2_index);
1615
1616
1617 l2_bitmap |= l2_bitmap << 32;
1618 l2_bitmap &= QCOW_L2_BITMAP_ALL_ZEROES;
1619
1620 set_l2_bitmap(s, l2_table, l2_index, l2_bitmap);
1621 set_l2_entry(s, l2_table, l2_index, 0);
1622 } else {
1623 set_l2_entry(s, l2_table, l2_index, QCOW_OFLAG_ZERO);
1624 }
1625
1626 ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s),
1627 false);
1628 if (metadata_overlap) {
1629 *metadata_overlap = ret < 0;
1630 }
1631 if (ret < 0) {
1632 fprintf(stderr, "ERROR: Overlap check failed\n");
1633 goto fail;
1634 }
1635
1636 ret = bdrv_pwrite_sync(bs->file, l2e_offset, l2_entry_size(s),
1637 &l2_table[idx], 0);
1638 if (ret < 0) {
1639 fprintf(stderr, "ERROR: Failed to overwrite L2 "
1640 "table entry: %s\n", strerror(-ret));
1641 goto fail;
1642 }
1643
1644 res->corruptions--;
1645 res->corruptions_fixed++;
1646 return 0;
1647
1648fail:
1649 res->check_errors++;
1650 return ret;
1651}
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
1662 void **refcount_table,
1663 int64_t *refcount_table_size, int64_t l2_offset,
1664 int flags, BdrvCheckMode fix, bool active)
1665{
1666 BDRVQcow2State *s = bs->opaque;
1667 uint64_t l2_entry, l2_bitmap;
1668 uint64_t next_contiguous_offset = 0;
1669 int i, ret;
1670 size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
1671 g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
1672 bool metadata_overlap;
1673
1674
1675 ret = bdrv_pread(bs->file, l2_offset, l2_size_bytes, l2_table, 0);
1676 if (ret < 0) {
1677 fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
1678 res->check_errors++;
1679 return ret;
1680 }
1681
1682
1683 for (i = 0; i < s->l2_size; i++) {
1684 uint64_t coffset;
1685 int csize;
1686 QCow2ClusterType type;
1687
1688 l2_entry = get_l2_entry(s, l2_table, i);
1689 l2_bitmap = get_l2_bitmap(s, l2_table, i);
1690 type = qcow2_get_cluster_type(bs, l2_entry);
1691
1692 if (type != QCOW2_CLUSTER_COMPRESSED) {
1693
1694 if (l2_entry & L2E_STD_RESERVED_MASK) {
1695 fprintf(stderr, "ERROR found l2 entry with reserved bits set: "
1696 "%" PRIx64 "\n", l2_entry);
1697 res->corruptions++;
1698 }
1699 }
1700
1701 switch (type) {
1702 case QCOW2_CLUSTER_COMPRESSED:
1703
1704 if (l2_entry & QCOW_OFLAG_COPIED) {
1705 fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": "
1706 "copied flag must never be set for compressed "
1707 "clusters\n", l2_entry & s->cluster_offset_mask);
1708 l2_entry &= ~QCOW_OFLAG_COPIED;
1709 res->corruptions++;
1710 }
1711
1712 if (has_data_file(bs)) {
1713 fprintf(stderr, "ERROR compressed cluster %d with data file, "
1714 "entry=0x%" PRIx64 "\n", i, l2_entry);
1715 res->corruptions++;
1716 break;
1717 }
1718
1719 if (l2_bitmap) {
1720 fprintf(stderr, "ERROR compressed cluster %d with non-zero "
1721 "subcluster allocation bitmap, entry=0x%" PRIx64 "\n",
1722 i, l2_entry);
1723 res->corruptions++;
1724 break;
1725 }
1726
1727
1728 qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
1729 ret = qcow2_inc_refcounts_imrt(
1730 bs, res, refcount_table, refcount_table_size, coffset, csize);
1731 if (ret < 0) {
1732 return ret;
1733 }
1734
1735 if (flags & CHECK_FRAG_INFO) {
1736 res->bfi.allocated_clusters++;
1737 res->bfi.compressed_clusters++;
1738
1739
1740
1741
1742
1743
1744
1745 res->bfi.fragmented_clusters++;
1746 }
1747 break;
1748
1749 case QCOW2_CLUSTER_ZERO_ALLOC:
1750 case QCOW2_CLUSTER_NORMAL:
1751 {
1752 uint64_t offset = l2_entry & L2E_OFFSET_MASK;
1753
1754 if ((l2_bitmap >> 32) & l2_bitmap) {
1755 res->corruptions++;
1756 fprintf(stderr, "ERROR offset=%" PRIx64 ": Allocated "
1757 "cluster has corrupted subcluster allocation bitmap\n",
1758 offset);
1759 }
1760
1761
1762 if (offset_into_cluster(s, offset)) {
1763 bool contains_data;
1764 res->corruptions++;
1765
1766 if (has_subclusters(s)) {
1767 contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC);
1768 } else {
1769 contains_data = !(l2_entry & QCOW_OFLAG_ZERO);
1770 }
1771
1772 if (!contains_data) {
1773 fprintf(stderr, "%s offset=%" PRIx64 ": Preallocated "
1774 "cluster is not properly aligned; L2 entry "
1775 "corrupted.\n",
1776 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR",
1777 offset);
1778 if (fix & BDRV_FIX_ERRORS) {
1779 ret = fix_l2_entry_by_zero(bs, res, l2_offset,
1780 l2_table, i, active,
1781 &metadata_overlap);
1782 if (metadata_overlap) {
1783
1784
1785
1786
1787 return ret;
1788 }
1789
1790 if (ret == 0) {
1791
1792
1793
1794
1795 continue;
1796 }
1797
1798
1799
1800
1801
1802
1803 }
1804 } else {
1805 fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is "
1806 "not properly aligned; L2 entry corrupted.\n", offset);
1807 }
1808 }
1809
1810 if (flags & CHECK_FRAG_INFO) {
1811 res->bfi.allocated_clusters++;
1812 if (next_contiguous_offset &&
1813 offset != next_contiguous_offset) {
1814 res->bfi.fragmented_clusters++;
1815 }
1816 next_contiguous_offset = offset + s->cluster_size;
1817 }
1818
1819
1820 if (!has_data_file(bs)) {
1821 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table,
1822 refcount_table_size,
1823 offset, s->cluster_size);
1824 if (ret < 0) {
1825 return ret;
1826 }
1827 }
1828 break;
1829 }
1830
1831 case QCOW2_CLUSTER_ZERO_PLAIN:
1832
1833 assert(!l2_bitmap);
1834 break;
1835
1836 case QCOW2_CLUSTER_UNALLOCATED:
1837 if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) {
1838 res->corruptions++;
1839 fprintf(stderr, "ERROR: Unallocated "
1840 "cluster has non-zero subcluster allocation map\n");
1841 }
1842 break;
1843
1844 default:
1845 abort();
1846 }
1847 }
1848
1849 return 0;
1850}
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860static int check_refcounts_l1(BlockDriverState *bs,
1861 BdrvCheckResult *res,
1862 void **refcount_table,
1863 int64_t *refcount_table_size,
1864 int64_t l1_table_offset, int l1_size,
1865 int flags, BdrvCheckMode fix, bool active)
1866{
1867 BDRVQcow2State *s = bs->opaque;
1868 size_t l1_size_bytes = l1_size * L1E_SIZE;
1869 g_autofree uint64_t *l1_table = NULL;
1870 uint64_t l2_offset;
1871 int i, ret;
1872
1873 if (!l1_size) {
1874 return 0;
1875 }
1876
1877
1878 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
1879 l1_table_offset, l1_size_bytes);
1880 if (ret < 0) {
1881 return ret;
1882 }
1883
1884 l1_table = g_try_malloc(l1_size_bytes);
1885 if (l1_table == NULL) {
1886 res->check_errors++;
1887 return -ENOMEM;
1888 }
1889
1890
1891 ret = bdrv_pread(bs->file, l1_table_offset, l1_size_bytes, l1_table, 0);
1892 if (ret < 0) {
1893 fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
1894 res->check_errors++;
1895 return ret;
1896 }
1897
1898 for (i = 0; i < l1_size; i++) {
1899 be64_to_cpus(&l1_table[i]);
1900 }
1901
1902
1903 for (i = 0; i < l1_size; i++) {
1904 if (!l1_table[i]) {
1905 continue;
1906 }
1907
1908 if (l1_table[i] & L1E_RESERVED_MASK) {
1909 fprintf(stderr, "ERROR found L1 entry with reserved bits set: "
1910 "%" PRIx64 "\n", l1_table[i]);
1911 res->corruptions++;
1912 }
1913
1914 l2_offset = l1_table[i] & L1E_OFFSET_MASK;
1915
1916
1917 ret = qcow2_inc_refcounts_imrt(bs, res,
1918 refcount_table, refcount_table_size,
1919 l2_offset, s->cluster_size);
1920 if (ret < 0) {
1921 return ret;
1922 }
1923
1924
1925 if (offset_into_cluster(s, l2_offset)) {
1926 fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
1927 "cluster aligned; L1 entry corrupted\n", l2_offset);
1928 res->corruptions++;
1929 }
1930
1931
1932 ret = check_refcounts_l2(bs, res, refcount_table,
1933 refcount_table_size, l2_offset, flags,
1934 fix, active);
1935 if (ret < 0) {
1936 return ret;
1937 }
1938 }
1939
1940 return 0;
1941}
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
1952 BdrvCheckMode fix)
1953{
1954 BDRVQcow2State *s = bs->opaque;
1955 uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
1956 int ret;
1957 uint64_t refcount;
1958 int i, j;
1959 bool repair;
1960
1961 if (fix & BDRV_FIX_ERRORS) {
1962
1963 repair = true;
1964 } else if (fix & BDRV_FIX_LEAKS) {
1965
1966
1967
1968 repair = !res->check_errors && !res->corruptions && !res->leaks;
1969 } else {
1970 repair = false;
1971 }
1972
1973 for (i = 0; i < s->l1_size; i++) {
1974 uint64_t l1_entry = s->l1_table[i];
1975 uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
1976 int l2_dirty = 0;
1977
1978 if (!l2_offset) {
1979 continue;
1980 }
1981
1982 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
1983 &refcount);
1984 if (ret < 0) {
1985
1986 continue;
1987 }
1988 if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
1989 res->corruptions++;
1990 fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
1991 "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
1992 repair ? "Repairing" : "ERROR", i, l1_entry, refcount);
1993 if (repair) {
1994 s->l1_table[i] = refcount == 1
1995 ? l1_entry | QCOW_OFLAG_COPIED
1996 : l1_entry & ~QCOW_OFLAG_COPIED;
1997 ret = qcow2_write_l1_entry(bs, i);
1998 if (ret < 0) {
1999 res->check_errors++;
2000 goto fail;
2001 }
2002 res->corruptions--;
2003 res->corruptions_fixed++;
2004 }
2005 }
2006
2007 ret = bdrv_pread(bs->file, l2_offset, s->l2_size * l2_entry_size(s),
2008 l2_table, 0);
2009 if (ret < 0) {
2010 fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
2011 strerror(-ret));
2012 res->check_errors++;
2013 goto fail;
2014 }
2015
2016 for (j = 0; j < s->l2_size; j++) {
2017 uint64_t l2_entry = get_l2_entry(s, l2_table, j);
2018 uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
2019 QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, l2_entry);
2020
2021 if (cluster_type == QCOW2_CLUSTER_NORMAL ||
2022 cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) {
2023 if (has_data_file(bs)) {
2024 refcount = 1;
2025 } else {
2026 ret = qcow2_get_refcount(bs,
2027 data_offset >> s->cluster_bits,
2028 &refcount);
2029 if (ret < 0) {
2030
2031 continue;
2032 }
2033 }
2034 if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
2035 res->corruptions++;
2036 fprintf(stderr, "%s OFLAG_COPIED data cluster: "
2037 "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
2038 repair ? "Repairing" : "ERROR", l2_entry, refcount);
2039 if (repair) {
2040 set_l2_entry(s, l2_table, j,
2041 refcount == 1 ?
2042 l2_entry | QCOW_OFLAG_COPIED :
2043 l2_entry & ~QCOW_OFLAG_COPIED);
2044 l2_dirty++;
2045 }
2046 }
2047 }
2048 }
2049
2050 if (l2_dirty > 0) {
2051 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
2052 l2_offset, s->cluster_size,
2053 false);
2054 if (ret < 0) {
2055 fprintf(stderr, "ERROR: Could not write L2 table; metadata "
2056 "overlap check failed: %s\n", strerror(-ret));
2057 res->check_errors++;
2058 goto fail;
2059 }
2060
2061 ret = bdrv_pwrite(bs->file, l2_offset, s->cluster_size, l2_table,
2062 0);
2063 if (ret < 0) {
2064 fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
2065 strerror(-ret));
2066 res->check_errors++;
2067 goto fail;
2068 }
2069 res->corruptions -= l2_dirty;
2070 res->corruptions_fixed += l2_dirty;
2071 }
2072 }
2073
2074 ret = 0;
2075
2076fail:
2077 qemu_vfree(l2_table);
2078 return ret;
2079}
2080
2081
2082
2083
2084
2085static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
2086 BdrvCheckMode fix, bool *rebuild,
2087 void **refcount_table, int64_t *nb_clusters)
2088{
2089 BDRVQcow2State *s = bs->opaque;
2090 int64_t i, size;
2091 int ret;
2092
2093 for(i = 0; i < s->refcount_table_size; i++) {
2094 uint64_t offset, cluster;
2095 offset = s->refcount_table[i] & REFT_OFFSET_MASK;
2096 cluster = offset >> s->cluster_bits;
2097
2098 if (s->refcount_table[i] & REFT_RESERVED_MASK) {
2099 fprintf(stderr, "ERROR refcount table entry %" PRId64 " has "
2100 "reserved bits set\n", i);
2101 res->corruptions++;
2102 *rebuild = true;
2103 continue;
2104 }
2105
2106
2107 if (offset_into_cluster(s, offset)) {
2108 fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
2109 "cluster aligned; refcount table entry corrupted\n", i);
2110 res->corruptions++;
2111 *rebuild = true;
2112 continue;
2113 }
2114
2115 if (cluster >= *nb_clusters) {
2116 res->corruptions++;
2117 fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n",
2118 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
2119
2120 if (fix & BDRV_FIX_ERRORS) {
2121 int64_t new_nb_clusters;
2122 Error *local_err = NULL;
2123
2124 if (offset > INT64_MAX - s->cluster_size) {
2125 ret = -EINVAL;
2126 goto resize_fail;
2127 }
2128
2129 ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
2130 PREALLOC_MODE_OFF, 0, &local_err);
2131 if (ret < 0) {
2132 error_report_err(local_err);
2133 goto resize_fail;
2134 }
2135 size = bdrv_getlength(bs->file->bs);
2136 if (size < 0) {
2137 ret = size;
2138 goto resize_fail;
2139 }
2140
2141 new_nb_clusters = size_to_clusters(s, size);
2142 assert(new_nb_clusters >= *nb_clusters);
2143
2144 ret = realloc_refcount_array(s, refcount_table,
2145 nb_clusters, new_nb_clusters);
2146 if (ret < 0) {
2147 res->check_errors++;
2148 return ret;
2149 }
2150
2151 if (cluster >= *nb_clusters) {
2152 ret = -EINVAL;
2153 goto resize_fail;
2154 }
2155
2156 res->corruptions--;
2157 res->corruptions_fixed++;
2158 ret = qcow2_inc_refcounts_imrt(bs, res,
2159 refcount_table, nb_clusters,
2160 offset, s->cluster_size);
2161 if (ret < 0) {
2162 return ret;
2163 }
2164
2165
2166
2167 continue;
2168
2169resize_fail:
2170 *rebuild = true;
2171 fprintf(stderr, "ERROR could not resize image: %s\n",
2172 strerror(-ret));
2173 }
2174 continue;
2175 }
2176
2177 if (offset != 0) {
2178 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2179 offset, s->cluster_size);
2180 if (ret < 0) {
2181 return ret;
2182 }
2183 if (s->get_refcount(*refcount_table, cluster) != 1) {
2184 fprintf(stderr, "ERROR refcount block %" PRId64
2185 " refcount=%" PRIu64 "\n", i,
2186 s->get_refcount(*refcount_table, cluster));
2187 res->corruptions++;
2188 *rebuild = true;
2189 }
2190 }
2191 }
2192
2193 return 0;
2194}
2195
2196
2197
2198
2199static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2200 BdrvCheckMode fix, bool *rebuild,
2201 void **refcount_table, int64_t *nb_clusters)
2202{
2203 BDRVQcow2State *s = bs->opaque;
2204 int64_t i;
2205 QCowSnapshot *sn;
2206 int ret;
2207
2208 if (!*refcount_table) {
2209 int64_t old_size = 0;
2210 ret = realloc_refcount_array(s, refcount_table,
2211 &old_size, *nb_clusters);
2212 if (ret < 0) {
2213 res->check_errors++;
2214 return ret;
2215 }
2216 }
2217
2218
2219 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2220 0, s->cluster_size);
2221 if (ret < 0) {
2222 return ret;
2223 }
2224
2225
2226 ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
2227 s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO,
2228 fix, true);
2229 if (ret < 0) {
2230 return ret;
2231 }
2232
2233
2234 if (has_data_file(bs) && s->nb_snapshots) {
2235 fprintf(stderr, "ERROR %d snapshots in image with data file\n",
2236 s->nb_snapshots);
2237 res->corruptions++;
2238 }
2239
2240 for (i = 0; i < s->nb_snapshots; i++) {
2241 sn = s->snapshots + i;
2242 if (offset_into_cluster(s, sn->l1_table_offset)) {
2243 fprintf(stderr, "ERROR snapshot %s (%s) l1_offset=%#" PRIx64 ": "
2244 "L1 table is not cluster aligned; snapshot table entry "
2245 "corrupted\n", sn->id_str, sn->name, sn->l1_table_offset);
2246 res->corruptions++;
2247 continue;
2248 }
2249 if (sn->l1_size > QCOW_MAX_L1_SIZE / L1E_SIZE) {
2250 fprintf(stderr, "ERROR snapshot %s (%s) l1_size=%#" PRIx32 ": "
2251 "L1 table is too large; snapshot table entry corrupted\n",
2252 sn->id_str, sn->name, sn->l1_size);
2253 res->corruptions++;
2254 continue;
2255 }
2256 ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
2257 sn->l1_table_offset, sn->l1_size, 0, fix,
2258 false);
2259 if (ret < 0) {
2260 return ret;
2261 }
2262 }
2263 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2264 s->snapshots_offset, s->snapshots_size);
2265 if (ret < 0) {
2266 return ret;
2267 }
2268
2269
2270 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2271 s->refcount_table_offset,
2272 s->refcount_table_size *
2273 REFTABLE_ENTRY_SIZE);
2274 if (ret < 0) {
2275 return ret;
2276 }
2277
2278
2279 if (s->crypto_header.length) {
2280 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2281 s->crypto_header.offset,
2282 s->crypto_header.length);
2283 if (ret < 0) {
2284 return ret;
2285 }
2286 }
2287
2288
2289 ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters);
2290 if (ret < 0) {
2291 return ret;
2292 }
2293
2294 return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters);
2295}
2296
2297
2298
2299
2300
2301static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2302 BdrvCheckMode fix, bool *rebuild,
2303 int64_t *highest_cluster,
2304 void *refcount_table, int64_t nb_clusters)
2305{
2306 BDRVQcow2State *s = bs->opaque;
2307 int64_t i;
2308 uint64_t refcount1, refcount2;
2309 int ret;
2310
2311 for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
2312 ret = qcow2_get_refcount(bs, i, &refcount1);
2313 if (ret < 0) {
2314 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
2315 i, strerror(-ret));
2316 res->check_errors++;
2317 continue;
2318 }
2319
2320 refcount2 = s->get_refcount(refcount_table, i);
2321
2322 if (refcount1 > 0 || refcount2 > 0) {
2323 *highest_cluster = i;
2324 }
2325
2326 if (refcount1 != refcount2) {
2327
2328 int *num_fixed = NULL;
2329 if (refcount1 == 0) {
2330 *rebuild = true;
2331 } else if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
2332 num_fixed = &res->leaks_fixed;
2333 } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
2334 num_fixed = &res->corruptions_fixed;
2335 }
2336
2337 fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64
2338 " reference=%" PRIu64 "\n",
2339 num_fixed != NULL ? "Repairing" :
2340 refcount1 < refcount2 ? "ERROR" :
2341 "Leaked",
2342 i, refcount1, refcount2);
2343
2344 if (num_fixed) {
2345 ret = update_refcount(bs, i << s->cluster_bits, 1,
2346 refcount_diff(refcount1, refcount2),
2347 refcount1 > refcount2,
2348 QCOW2_DISCARD_ALWAYS);
2349 if (ret >= 0) {
2350 (*num_fixed)++;
2351 continue;
2352 }
2353 }
2354
2355
2356 if (refcount1 < refcount2) {
2357 res->corruptions++;
2358 } else {
2359 res->leaks++;
2360 }
2361 }
2362 }
2363}
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377static int64_t alloc_clusters_imrt(BlockDriverState *bs,
2378 int cluster_count,
2379 void **refcount_table,
2380 int64_t *imrt_nb_clusters,
2381 int64_t *first_free_cluster)
2382{
2383 BDRVQcow2State *s = bs->opaque;
2384 int64_t cluster = *first_free_cluster, i;
2385 bool first_gap = true;
2386 int contiguous_free_clusters;
2387 int ret;
2388
2389
2390
2391 for (contiguous_free_clusters = 0;
2392 cluster < *imrt_nb_clusters &&
2393 contiguous_free_clusters < cluster_count;
2394 cluster++)
2395 {
2396 if (!s->get_refcount(*refcount_table, cluster)) {
2397 contiguous_free_clusters++;
2398 if (first_gap) {
2399
2400
2401 *first_free_cluster = cluster;
2402 first_gap = false;
2403 }
2404 } else if (contiguous_free_clusters) {
2405 contiguous_free_clusters = 0;
2406 }
2407 }
2408
2409
2410
2411
2412
2413
2414
2415
2416 if (contiguous_free_clusters < cluster_count) {
2417
2418
2419
2420
2421
2422
2423 ret = realloc_refcount_array(s, refcount_table, imrt_nb_clusters,
2424 cluster + cluster_count
2425 - contiguous_free_clusters);
2426 if (ret < 0) {
2427 return ret;
2428 }
2429 }
2430
2431
2432 cluster -= contiguous_free_clusters;
2433 for (i = 0; i < cluster_count; i++) {
2434 s->set_refcount(*refcount_table, cluster + i, 1);
2435 }
2436
2437 return cluster << s->cluster_bits;
2438}
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465static int rebuild_refcounts_write_refblocks(
2466 BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
2467 int64_t first_cluster, int64_t end_cluster,
2468 uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr,
2469 Error **errp
2470 )
2471{
2472 BDRVQcow2State *s = bs->opaque;
2473 int64_t cluster;
2474 int64_t refblock_offset, refblock_start, refblock_index;
2475 int64_t first_free_cluster = 0;
2476 uint64_t *on_disk_reftable = *on_disk_reftable_ptr;
2477 uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr;
2478 void *on_disk_refblock;
2479 bool reftable_grown = false;
2480 int ret;
2481
2482 for (cluster = first_cluster; cluster < end_cluster; cluster++) {
2483
2484 if (!s->get_refcount(*refcount_table, cluster)) {
2485 continue;
2486 }
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497 refblock_index = cluster >> s->refcount_block_bits;
2498 refblock_start = refblock_index << s->refcount_block_bits;
2499
2500 if (on_disk_reftable_entries > refblock_index &&
2501 on_disk_reftable[refblock_index])
2502 {
2503
2504
2505
2506
2507
2508 refblock_offset = on_disk_reftable[refblock_index];
2509 } else {
2510 int64_t refblock_cluster_index;
2511
2512
2513 if (first_free_cluster < refblock_start) {
2514 first_free_cluster = refblock_start;
2515 }
2516 refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
2517 nb_clusters,
2518 &first_free_cluster);
2519 if (refblock_offset < 0) {
2520 error_setg_errno(errp, -refblock_offset,
2521 "ERROR allocating refblock");
2522 return refblock_offset;
2523 }
2524
2525 refblock_cluster_index = refblock_offset / s->cluster_size;
2526 if (refblock_cluster_index >= end_cluster) {
2527
2528
2529
2530
2531 end_cluster = refblock_cluster_index + 1;
2532 }
2533
2534 if (on_disk_reftable_entries <= refblock_index) {
2535 on_disk_reftable_entries =
2536 ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
2537 s->cluster_size) / REFTABLE_ENTRY_SIZE;
2538 on_disk_reftable =
2539 g_try_realloc(on_disk_reftable,
2540 on_disk_reftable_entries *
2541 REFTABLE_ENTRY_SIZE);
2542 if (!on_disk_reftable) {
2543 error_setg(errp, "ERROR allocating reftable memory");
2544 return -ENOMEM;
2545 }
2546
2547 memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0,
2548 (on_disk_reftable_entries -
2549 *on_disk_reftable_entries_ptr) *
2550 REFTABLE_ENTRY_SIZE);
2551
2552 *on_disk_reftable_ptr = on_disk_reftable;
2553 *on_disk_reftable_entries_ptr = on_disk_reftable_entries;
2554
2555 reftable_grown = true;
2556 } else {
2557 assert(on_disk_reftable);
2558 }
2559 on_disk_reftable[refblock_index] = refblock_offset;
2560 }
2561
2562
2563
2564 ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
2565 s->cluster_size, false);
2566 if (ret < 0) {
2567 error_setg_errno(errp, -ret, "ERROR writing refblock");
2568 return ret;
2569 }
2570
2571
2572
2573
2574
2575
2576
2577 on_disk_refblock = (void *)((char *) *refcount_table +
2578 refblock_index * s->cluster_size);
2579
2580 ret = bdrv_pwrite(bs->file, refblock_offset, s->cluster_size,
2581 on_disk_refblock, 0);
2582 if (ret < 0) {
2583 error_setg_errno(errp, -ret, "ERROR writing refblock");
2584 return ret;
2585 }
2586
2587
2588 cluster = refblock_start + s->refcount_block_size - 1;
2589 }
2590
2591 return reftable_grown;
2592}
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603static int rebuild_refcount_structure(BlockDriverState *bs,
2604 BdrvCheckResult *res,
2605 void **refcount_table,
2606 int64_t *nb_clusters,
2607 Error **errp)
2608{
2609 BDRVQcow2State *s = bs->opaque;
2610 int64_t reftable_offset = -1;
2611 int64_t reftable_length = 0;
2612 int64_t reftable_clusters;
2613 int64_t refblock_index;
2614 uint32_t on_disk_reftable_entries = 0;
2615 uint64_t *on_disk_reftable = NULL;
2616 int ret = 0;
2617 int reftable_size_changed = 0;
2618 struct {
2619 uint64_t reftable_offset;
2620 uint32_t reftable_clusters;
2621 } QEMU_PACKED reftable_offset_and_clusters;
2622
2623 qcow2_cache_empty(bs, s->refcount_block_cache);
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654 reftable_size_changed =
2655 rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
2656 0, *nb_clusters,
2657 &on_disk_reftable,
2658 &on_disk_reftable_entries, errp);
2659 if (reftable_size_changed < 0) {
2660 res->check_errors++;
2661 ret = reftable_size_changed;
2662 goto fail;
2663 }
2664
2665
2666
2667
2668
2669 assert(reftable_size_changed);
2670
2671 do {
2672 int64_t reftable_start_cluster, reftable_end_cluster;
2673 int64_t first_free_cluster = 0;
2674
2675 reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE;
2676 reftable_clusters = size_to_clusters(s, reftable_length);
2677
2678 reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
2679 refcount_table, nb_clusters,
2680 &first_free_cluster);
2681 if (reftable_offset < 0) {
2682 error_setg_errno(errp, -reftable_offset,
2683 "ERROR allocating reftable");
2684 res->check_errors++;
2685 ret = reftable_offset;
2686 goto fail;
2687 }
2688
2689
2690
2691
2692
2693 assert(offset_into_cluster(s, reftable_offset) == 0);
2694 reftable_start_cluster = reftable_offset / s->cluster_size;
2695 reftable_end_cluster = reftable_start_cluster + reftable_clusters;
2696 reftable_size_changed =
2697 rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
2698 reftable_start_cluster,
2699 reftable_end_cluster,
2700 &on_disk_reftable,
2701 &on_disk_reftable_entries, errp);
2702 if (reftable_size_changed < 0) {
2703 res->check_errors++;
2704 ret = reftable_size_changed;
2705 goto fail;
2706 }
2707
2708
2709
2710
2711
2712 } while (reftable_size_changed);
2713
2714
2715 assert(reftable_offset >= 0);
2716
2717
2718
2719
2720
2721
2722 for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
2723 refblock_index++)
2724 {
2725 cpu_to_be64s(&on_disk_reftable[refblock_index]);
2726 }
2727
2728 ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
2729 false);
2730 if (ret < 0) {
2731 error_setg_errno(errp, -ret, "ERROR writing reftable");
2732 goto fail;
2733 }
2734
2735 assert(reftable_length < INT_MAX);
2736 ret = bdrv_pwrite(bs->file, reftable_offset, reftable_length,
2737 on_disk_reftable, 0);
2738 if (ret < 0) {
2739 error_setg_errno(errp, -ret, "ERROR writing reftable");
2740 goto fail;
2741 }
2742
2743
2744 reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
2745 reftable_offset_and_clusters.reftable_clusters =
2746 cpu_to_be32(reftable_clusters);
2747 ret = bdrv_pwrite_sync(bs->file,
2748 offsetof(QCowHeader, refcount_table_offset),
2749 sizeof(reftable_offset_and_clusters),
2750 &reftable_offset_and_clusters, 0);
2751 if (ret < 0) {
2752 error_setg_errno(errp, -ret, "ERROR setting reftable");
2753 goto fail;
2754 }
2755
2756 for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
2757 refblock_index++)
2758 {
2759 be64_to_cpus(&on_disk_reftable[refblock_index]);
2760 }
2761 s->refcount_table = on_disk_reftable;
2762 s->refcount_table_offset = reftable_offset;
2763 s->refcount_table_size = on_disk_reftable_entries;
2764 update_max_refcount_table_index(s);
2765
2766 return 0;
2767
2768fail:
2769 g_free(on_disk_reftable);
2770 return ret;
2771}
2772
2773
2774
2775
2776
2777
2778
2779int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2780 BdrvCheckMode fix)
2781{
2782 BDRVQcow2State *s = bs->opaque;
2783 BdrvCheckResult pre_compare_res;
2784 int64_t size, highest_cluster, nb_clusters;
2785 void *refcount_table = NULL;
2786 bool rebuild = false;
2787 int ret;
2788
2789 size = bdrv_getlength(bs->file->bs);
2790 if (size < 0) {
2791 res->check_errors++;
2792 return size;
2793 }
2794
2795 nb_clusters = size_to_clusters(s, size);
2796 if (nb_clusters > INT_MAX) {
2797 res->check_errors++;
2798 return -EFBIG;
2799 }
2800
2801 res->bfi.total_clusters =
2802 size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
2803
2804 ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table,
2805 &nb_clusters);
2806 if (ret < 0) {
2807 goto fail;
2808 }
2809
2810
2811
2812
2813 pre_compare_res = *res;
2814 compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcount_table,
2815 nb_clusters);
2816
2817 if (rebuild && (fix & BDRV_FIX_ERRORS)) {
2818 BdrvCheckResult old_res = *res;
2819 int fresh_leaks = 0;
2820 Error *local_err = NULL;
2821
2822 fprintf(stderr, "Rebuilding refcount structure\n");
2823 ret = rebuild_refcount_structure(bs, res, &refcount_table,
2824 &nb_clusters, &local_err);
2825 if (ret < 0) {
2826 error_report_err(local_err);
2827 goto fail;
2828 }
2829
2830 res->corruptions = 0;
2831 res->leaks = 0;
2832
2833
2834
2835 rebuild = false;
2836 memset(refcount_table, 0, refcount_array_byte_size(s, nb_clusters));
2837 ret = calculate_refcounts(bs, res, 0, &rebuild, &refcount_table,
2838 &nb_clusters);
2839 if (ret < 0) {
2840 goto fail;
2841 }
2842
2843 if (fix & BDRV_FIX_LEAKS) {
2844
2845
2846
2847 BdrvCheckResult saved_res = *res;
2848 *res = (BdrvCheckResult){ 0 };
2849
2850 compare_refcounts(bs, res, BDRV_FIX_LEAKS, &rebuild,
2851 &highest_cluster, refcount_table, nb_clusters);
2852 if (rebuild) {
2853 fprintf(stderr, "ERROR rebuilt refcount structure is still "
2854 "broken\n");
2855 }
2856
2857
2858
2859
2860 fresh_leaks = res->leaks;
2861 *res = saved_res;
2862 }
2863
2864 if (res->corruptions < old_res.corruptions) {
2865 res->corruptions_fixed += old_res.corruptions - res->corruptions;
2866 }
2867 if (res->leaks < old_res.leaks) {
2868 res->leaks_fixed += old_res.leaks - res->leaks;
2869 }
2870 res->leaks += fresh_leaks;
2871 } else if (fix) {
2872 if (rebuild) {
2873 fprintf(stderr, "ERROR need to rebuild refcount structures\n");
2874 res->check_errors++;
2875 ret = -EIO;
2876 goto fail;
2877 }
2878
2879 if (res->leaks || res->corruptions) {
2880 *res = pre_compare_res;
2881 compare_refcounts(bs, res, fix, &rebuild, &highest_cluster,
2882 refcount_table, nb_clusters);
2883 }
2884 }
2885
2886
2887 ret = check_oflag_copied(bs, res, fix);
2888 if (ret < 0) {
2889 goto fail;
2890 }
2891
2892 res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
2893 ret = 0;
2894
2895fail:
2896 g_free(refcount_table);
2897
2898 return ret;
2899}
2900
2901#define overlaps_with(ofs, sz) \
2902 ranges_overlap(offset, size, ofs, sz)
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
2919 int64_t size)
2920{
2921 BDRVQcow2State *s = bs->opaque;
2922 int chk = s->overlap_check & ~ign;
2923 int i, j;
2924
2925 if (!size) {
2926 return 0;
2927 }
2928
2929 if (chk & QCOW2_OL_MAIN_HEADER) {
2930 if (offset < s->cluster_size) {
2931 return QCOW2_OL_MAIN_HEADER;
2932 }
2933 }
2934
2935
2936 size = ROUND_UP(offset_into_cluster(s, offset) + size, s->cluster_size);
2937 offset = start_of_cluster(s, offset);
2938
2939 if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
2940 if (overlaps_with(s->l1_table_offset, s->l1_size * L1E_SIZE)) {
2941 return QCOW2_OL_ACTIVE_L1;
2942 }
2943 }
2944
2945 if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
2946 if (overlaps_with(s->refcount_table_offset,
2947 s->refcount_table_size * REFTABLE_ENTRY_SIZE)) {
2948 return QCOW2_OL_REFCOUNT_TABLE;
2949 }
2950 }
2951
2952 if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
2953 if (overlaps_with(s->snapshots_offset, s->snapshots_size)) {
2954 return QCOW2_OL_SNAPSHOT_TABLE;
2955 }
2956 }
2957
2958 if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
2959 for (i = 0; i < s->nb_snapshots; i++) {
2960 if (s->snapshots[i].l1_size &&
2961 overlaps_with(s->snapshots[i].l1_table_offset,
2962 s->snapshots[i].l1_size * L1E_SIZE)) {
2963 return QCOW2_OL_INACTIVE_L1;
2964 }
2965 }
2966 }
2967
2968 if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
2969 for (i = 0; i < s->l1_size; i++) {
2970 if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
2971 overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK,
2972 s->cluster_size)) {
2973 return QCOW2_OL_ACTIVE_L2;
2974 }
2975 }
2976 }
2977
2978 if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
2979 unsigned last_entry = s->max_refcount_table_index;
2980 assert(last_entry < s->refcount_table_size);
2981 assert(last_entry + 1 == s->refcount_table_size ||
2982 (s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0);
2983 for (i = 0; i <= last_entry; i++) {
2984 if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
2985 overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
2986 s->cluster_size)) {
2987 return QCOW2_OL_REFCOUNT_BLOCK;
2988 }
2989 }
2990 }
2991
2992 if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
2993 for (i = 0; i < s->nb_snapshots; i++) {
2994 uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
2995 uint32_t l1_sz = s->snapshots[i].l1_size;
2996 uint64_t l1_sz2 = l1_sz * L1E_SIZE;
2997 uint64_t *l1;
2998 int ret;
2999
3000 ret = qcow2_validate_table(bs, l1_ofs, l1_sz, L1E_SIZE,
3001 QCOW_MAX_L1_SIZE, "", NULL);
3002 if (ret < 0) {
3003 return ret;
3004 }
3005
3006 l1 = g_try_malloc(l1_sz2);
3007
3008 if (l1_sz2 && l1 == NULL) {
3009 return -ENOMEM;
3010 }
3011
3012 ret = bdrv_pread(bs->file, l1_ofs, l1_sz2, l1, 0);
3013 if (ret < 0) {
3014 g_free(l1);
3015 return ret;
3016 }
3017
3018 for (j = 0; j < l1_sz; j++) {
3019 uint64_t l2_ofs = be64_to_cpu(l1[j]) & L1E_OFFSET_MASK;
3020 if (l2_ofs && overlaps_with(l2_ofs, s->cluster_size)) {
3021 g_free(l1);
3022 return QCOW2_OL_INACTIVE_L2;
3023 }
3024 }
3025
3026 g_free(l1);
3027 }
3028 }
3029
3030 if ((chk & QCOW2_OL_BITMAP_DIRECTORY) &&
3031 (s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS))
3032 {
3033 if (overlaps_with(s->bitmap_directory_offset,
3034 s->bitmap_directory_size))
3035 {
3036 return QCOW2_OL_BITMAP_DIRECTORY;
3037 }
3038 }
3039
3040 return 0;
3041}
3042
3043static const char *metadata_ol_names[] = {
3044 [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header",
3045 [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table",
3046 [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table",
3047 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
3048 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
3049 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
3050 [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table",
3051 [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table",
3052 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = "bitmap directory",
3053};
3054QEMU_BUILD_BUG_ON(QCOW2_OL_MAX_BITNR != ARRAY_SIZE(metadata_ol_names));
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
3067 int64_t size, bool data_file)
3068{
3069 int ret;
3070
3071 if (data_file && has_data_file(bs)) {
3072 return 0;
3073 }
3074
3075 ret = qcow2_check_metadata_overlap(bs, ign, offset, size);
3076 if (ret < 0) {
3077 return ret;
3078 } else if (ret > 0) {
3079 int metadata_ol_bitnr = ctz32(ret);
3080 assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
3081
3082 qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid "
3083 "write on metadata (overlaps with %s)",
3084 metadata_ol_names[metadata_ol_bitnr]);
3085 return -EIO;
3086 }
3087
3088 return 0;
3089}
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
3102 uint64_t reftable_index, uint64_t *reftable_size,
3103 void *refblock, bool refblock_empty,
3104 bool *allocated, Error **errp);
3105
3106
3107
3108
3109
3110
3111static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
3112 uint64_t reftable_index, uint64_t *reftable_size,
3113 void *refblock, bool refblock_empty, bool *allocated,
3114 Error **errp)
3115{
3116 BDRVQcow2State *s = bs->opaque;
3117 int64_t offset;
3118
3119 if (!refblock_empty && reftable_index >= *reftable_size) {
3120 uint64_t *new_reftable;
3121 uint64_t new_reftable_size;
3122
3123 new_reftable_size = ROUND_UP(reftable_index + 1,
3124 s->cluster_size / REFTABLE_ENTRY_SIZE);
3125 if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / REFTABLE_ENTRY_SIZE) {
3126 error_setg(errp,
3127 "This operation would make the refcount table grow "
3128 "beyond the maximum size supported by QEMU, aborting");
3129 return -ENOTSUP;
3130 }
3131
3132 new_reftable = g_try_realloc(*reftable, new_reftable_size *
3133 REFTABLE_ENTRY_SIZE);
3134 if (!new_reftable) {
3135 error_setg(errp, "Failed to increase reftable buffer size");
3136 return -ENOMEM;
3137 }
3138
3139 memset(new_reftable + *reftable_size, 0,
3140 (new_reftable_size - *reftable_size) * REFTABLE_ENTRY_SIZE);
3141
3142 *reftable = new_reftable;
3143 *reftable_size = new_reftable_size;
3144 }
3145
3146 if (!refblock_empty && !(*reftable)[reftable_index]) {
3147 offset = qcow2_alloc_clusters(bs, s->cluster_size);
3148 if (offset < 0) {
3149 error_setg_errno(errp, -offset, "Failed to allocate refblock");
3150 return offset;
3151 }
3152 (*reftable)[reftable_index] = offset;
3153 *allocated = true;
3154 }
3155
3156 return 0;
3157}
3158
3159
3160
3161
3162
3163
3164static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
3165 uint64_t reftable_index, uint64_t *reftable_size,
3166 void *refblock, bool refblock_empty, bool *allocated,
3167 Error **errp)
3168{
3169 BDRVQcow2State *s = bs->opaque;
3170 int64_t offset;
3171 int ret;
3172
3173 if (reftable_index < *reftable_size && (*reftable)[reftable_index]) {
3174 offset = (*reftable)[reftable_index];
3175
3176 ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size,
3177 false);
3178 if (ret < 0) {
3179 error_setg_errno(errp, -ret, "Overlap check failed");
3180 return ret;
3181 }
3182
3183 ret = bdrv_pwrite(bs->file, offset, s->cluster_size, refblock, 0);
3184 if (ret < 0) {
3185 error_setg_errno(errp, -ret, "Failed to write refblock");
3186 return ret;
3187 }
3188 } else {
3189 assert(refblock_empty);
3190 }
3191
3192 return 0;
3193}
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
3209 uint64_t *new_reftable_index,
3210 uint64_t *new_reftable_size,
3211 void *new_refblock, int new_refblock_size,
3212 int new_refcount_bits,
3213 RefblockFinishOp *operation, bool *allocated,
3214 Qcow2SetRefcountFunc *new_set_refcount,
3215 BlockDriverAmendStatusCB *status_cb,
3216 void *cb_opaque, int index, int total,
3217 Error **errp)
3218{
3219 BDRVQcow2State *s = bs->opaque;
3220 uint64_t reftable_index;
3221 bool new_refblock_empty = true;
3222 int refblock_index;
3223 int new_refblock_index = 0;
3224 int ret;
3225
3226 for (reftable_index = 0; reftable_index < s->refcount_table_size;
3227 reftable_index++)
3228 {
3229 uint64_t refblock_offset = s->refcount_table[reftable_index]
3230 & REFT_OFFSET_MASK;
3231
3232 status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index,
3233 (uint64_t)total * s->refcount_table_size, cb_opaque);
3234
3235 if (refblock_offset) {
3236 void *refblock;
3237
3238 if (offset_into_cluster(s, refblock_offset)) {
3239 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
3240 PRIx64 " unaligned (reftable index: %#"
3241 PRIx64 ")", refblock_offset,
3242 reftable_index);
3243 error_setg(errp,
3244 "Image is corrupt (unaligned refblock offset)");
3245 return -EIO;
3246 }
3247
3248 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset,
3249 &refblock);
3250 if (ret < 0) {
3251 error_setg_errno(errp, -ret, "Failed to retrieve refblock");
3252 return ret;
3253 }
3254
3255 for (refblock_index = 0; refblock_index < s->refcount_block_size;
3256 refblock_index++)
3257 {
3258 uint64_t refcount;
3259
3260 if (new_refblock_index >= new_refblock_size) {
3261
3262 ret = operation(bs, new_reftable, *new_reftable_index,
3263 new_reftable_size, new_refblock,
3264 new_refblock_empty, allocated, errp);
3265 if (ret < 0) {
3266 qcow2_cache_put(s->refcount_block_cache, &refblock);
3267 return ret;
3268 }
3269
3270 (*new_reftable_index)++;
3271 new_refblock_index = 0;
3272 new_refblock_empty = true;
3273 }
3274
3275 refcount = s->get_refcount(refblock, refblock_index);
3276 if (new_refcount_bits < 64 && refcount >> new_refcount_bits) {
3277 uint64_t offset;
3278
3279 qcow2_cache_put(s->refcount_block_cache, &refblock);
3280
3281 offset = ((reftable_index << s->refcount_block_bits)
3282 + refblock_index) << s->cluster_bits;
3283
3284 error_setg(errp, "Cannot decrease refcount entry width to "
3285 "%i bits: Cluster at offset %#" PRIx64 " has a "
3286 "refcount of %" PRIu64, new_refcount_bits,
3287 offset, refcount);
3288 return -EINVAL;
3289 }
3290
3291 if (new_set_refcount) {
3292 new_set_refcount(new_refblock, new_refblock_index++,
3293 refcount);
3294 } else {
3295 new_refblock_index++;
3296 }
3297 new_refblock_empty = new_refblock_empty && refcount == 0;
3298 }
3299
3300 qcow2_cache_put(s->refcount_block_cache, &refblock);
3301 } else {
3302
3303 for (refblock_index = 0; refblock_index < s->refcount_block_size;
3304 refblock_index++)
3305 {
3306 if (new_refblock_index >= new_refblock_size) {
3307
3308 ret = operation(bs, new_reftable, *new_reftable_index,
3309 new_reftable_size, new_refblock,
3310 new_refblock_empty, allocated, errp);
3311 if (ret < 0) {
3312 return ret;
3313 }
3314
3315 (*new_reftable_index)++;
3316 new_refblock_index = 0;
3317 new_refblock_empty = true;
3318 }
3319
3320 if (new_set_refcount) {
3321 new_set_refcount(new_refblock, new_refblock_index++, 0);
3322 } else {
3323 new_refblock_index++;
3324 }
3325 }
3326 }
3327 }
3328
3329 if (new_refblock_index > 0) {
3330
3331 if (new_set_refcount) {
3332 for (; new_refblock_index < new_refblock_size;
3333 new_refblock_index++)
3334 {
3335 new_set_refcount(new_refblock, new_refblock_index, 0);
3336 }
3337 }
3338
3339 ret = operation(bs, new_reftable, *new_reftable_index,
3340 new_reftable_size, new_refblock, new_refblock_empty,
3341 allocated, errp);
3342 if (ret < 0) {
3343 return ret;
3344 }
3345
3346 (*new_reftable_index)++;
3347 }
3348
3349 status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size,
3350 (uint64_t)total * s->refcount_table_size, cb_opaque);
3351
3352 return 0;
3353}
3354
3355int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
3356 BlockDriverAmendStatusCB *status_cb,
3357 void *cb_opaque, Error **errp)
3358{
3359 BDRVQcow2State *s = bs->opaque;
3360 Qcow2GetRefcountFunc *new_get_refcount;
3361 Qcow2SetRefcountFunc *new_set_refcount;
3362 void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size);
3363 uint64_t *new_reftable = NULL, new_reftable_size = 0;
3364 uint64_t *old_reftable, old_reftable_size, old_reftable_offset;
3365 uint64_t new_reftable_index = 0;
3366 uint64_t i;
3367 int64_t new_reftable_offset = 0, allocated_reftable_size = 0;
3368 int new_refblock_size, new_refcount_bits = 1 << refcount_order;
3369 int old_refcount_order;
3370 int walk_index = 0;
3371 int ret;
3372 bool new_allocation;
3373
3374 assert(s->qcow_version >= 3);
3375 assert(refcount_order >= 0 && refcount_order <= 6);
3376
3377
3378 new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3));
3379
3380 new_get_refcount = get_refcount_funcs[refcount_order];
3381 new_set_refcount = set_refcount_funcs[refcount_order];
3382
3383
3384 do {
3385 int total_walks;
3386
3387 new_allocation = false;
3388
3389
3390
3391
3392
3393
3394 total_walks = MAX(walk_index + 2, 3);
3395
3396
3397
3398 ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
3399 &new_reftable_size, NULL, new_refblock_size,
3400 new_refcount_bits, &alloc_refblock,
3401 &new_allocation, NULL, status_cb, cb_opaque,
3402 walk_index++, total_walks, errp);
3403 if (ret < 0) {
3404 goto done;
3405 }
3406
3407 new_reftable_index = 0;
3408
3409 if (new_allocation) {
3410 if (new_reftable_offset) {
3411 qcow2_free_clusters(
3412 bs, new_reftable_offset,
3413 allocated_reftable_size * REFTABLE_ENTRY_SIZE,
3414 QCOW2_DISCARD_NEVER);
3415 }
3416
3417 new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size *
3418 REFTABLE_ENTRY_SIZE);
3419 if (new_reftable_offset < 0) {
3420 error_setg_errno(errp, -new_reftable_offset,
3421 "Failed to allocate the new reftable");
3422 ret = new_reftable_offset;
3423 goto done;
3424 }
3425 allocated_reftable_size = new_reftable_size;
3426 }
3427 } while (new_allocation);
3428
3429
3430 ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
3431 &new_reftable_size, new_refblock,
3432 new_refblock_size, new_refcount_bits,
3433 &flush_refblock, &new_allocation, new_set_refcount,
3434 status_cb, cb_opaque, walk_index, walk_index + 1,
3435 errp);
3436 if (ret < 0) {
3437 goto done;
3438 }
3439 assert(!new_allocation);
3440
3441
3442
3443 ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset,
3444 new_reftable_size * REFTABLE_ENTRY_SIZE,
3445 false);
3446 if (ret < 0) {
3447 error_setg_errno(errp, -ret, "Overlap check failed");
3448 goto done;
3449 }
3450
3451 for (i = 0; i < new_reftable_size; i++) {
3452 cpu_to_be64s(&new_reftable[i]);
3453 }
3454
3455 ret = bdrv_pwrite(bs->file, new_reftable_offset,
3456 new_reftable_size * REFTABLE_ENTRY_SIZE, new_reftable,
3457 0);
3458
3459 for (i = 0; i < new_reftable_size; i++) {
3460 be64_to_cpus(&new_reftable[i]);
3461 }
3462
3463 if (ret < 0) {
3464 error_setg_errno(errp, -ret, "Failed to write the new reftable");
3465 goto done;
3466 }
3467
3468
3469
3470 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
3471 if (ret < 0) {
3472 error_setg_errno(errp, -ret, "Failed to flush the refblock cache");
3473 goto done;
3474 }
3475
3476
3477
3478
3479
3480 old_refcount_order = s->refcount_order;
3481 old_reftable_size = s->refcount_table_size;
3482 old_reftable_offset = s->refcount_table_offset;
3483
3484 s->refcount_order = refcount_order;
3485 s->refcount_table_size = new_reftable_size;
3486 s->refcount_table_offset = new_reftable_offset;
3487
3488 ret = qcow2_update_header(bs);
3489 if (ret < 0) {
3490 s->refcount_order = old_refcount_order;
3491 s->refcount_table_size = old_reftable_size;
3492 s->refcount_table_offset = old_reftable_offset;
3493 error_setg_errno(errp, -ret, "Failed to update the qcow2 header");
3494 goto done;
3495 }
3496
3497
3498 old_reftable = s->refcount_table;
3499 s->refcount_table = new_reftable;
3500 update_max_refcount_table_index(s);
3501
3502 s->refcount_bits = 1 << refcount_order;
3503 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
3504 s->refcount_max += s->refcount_max - 1;
3505
3506 s->refcount_block_bits = s->cluster_bits - (refcount_order - 3);
3507 s->refcount_block_size = 1 << s->refcount_block_bits;
3508
3509 s->get_refcount = new_get_refcount;
3510 s->set_refcount = new_set_refcount;
3511
3512
3513
3514 new_reftable = old_reftable;
3515 new_reftable_size = old_reftable_size;
3516 new_reftable_offset = old_reftable_offset;
3517
3518done:
3519 if (new_reftable) {
3520
3521
3522
3523 for (i = 0; i < new_reftable_size; i++) {
3524 uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK;
3525 if (offset) {
3526 qcow2_free_clusters(bs, offset, s->cluster_size,
3527 QCOW2_DISCARD_OTHER);
3528 }
3529 }
3530 g_free(new_reftable);
3531
3532 if (new_reftable_offset > 0) {
3533 qcow2_free_clusters(bs, new_reftable_offset,
3534 new_reftable_size * REFTABLE_ENTRY_SIZE,
3535 QCOW2_DISCARD_OTHER);
3536 }
3537 }
3538
3539 qemu_vfree(new_refblock);
3540 return ret;
3541}
3542
3543static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset)
3544{
3545 BDRVQcow2State *s = bs->opaque;
3546 uint32_t index = offset_to_reftable_index(s, offset);
3547 int64_t covering_refblock_offset = 0;
3548
3549 if (index < s->refcount_table_size) {
3550 covering_refblock_offset = s->refcount_table[index] & REFT_OFFSET_MASK;
3551 }
3552 if (!covering_refblock_offset) {
3553 qcow2_signal_corruption(bs, true, -1, -1, "Refblock at %#" PRIx64 " is "
3554 "not covered by the refcount structures",
3555 offset);
3556 return -EIO;
3557 }
3558
3559 return covering_refblock_offset;
3560}
3561
3562static int coroutine_fn
3563qcow2_discard_refcount_block(BlockDriverState *bs, uint64_t discard_block_offs)
3564{
3565 BDRVQcow2State *s = bs->opaque;
3566 int64_t refblock_offs;
3567 uint64_t cluster_index = discard_block_offs >> s->cluster_bits;
3568 uint32_t block_index = cluster_index & (s->refcount_block_size - 1);
3569 void *refblock;
3570 int ret;
3571
3572 refblock_offs = get_refblock_offset(bs, discard_block_offs);
3573 if (refblock_offs < 0) {
3574 return refblock_offs;
3575 }
3576
3577 assert(discard_block_offs != 0);
3578
3579 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
3580 &refblock);
3581 if (ret < 0) {
3582 return ret;
3583 }
3584
3585 if (s->get_refcount(refblock, block_index) != 1) {
3586 qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:"
3587 " refblock offset %#" PRIx64
3588 ", reftable index %u"
3589 ", block offset %#" PRIx64
3590 ", refcount %#" PRIx64,
3591 refblock_offs,
3592 offset_to_reftable_index(s, discard_block_offs),
3593 discard_block_offs,
3594 s->get_refcount(refblock, block_index));
3595 qcow2_cache_put(s->refcount_block_cache, &refblock);
3596 return -EINVAL;
3597 }
3598 s->set_refcount(refblock, block_index, 0);
3599
3600 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock);
3601
3602 qcow2_cache_put(s->refcount_block_cache, &refblock);
3603
3604 if (cluster_index < s->free_cluster_index) {
3605 s->free_cluster_index = cluster_index;
3606 }
3607
3608 refblock = qcow2_cache_is_table_offset(s->refcount_block_cache,
3609 discard_block_offs);
3610 if (refblock) {
3611
3612 qcow2_cache_discard(s->refcount_block_cache, refblock);
3613 }
3614 update_refcount_discard(bs, discard_block_offs, s->cluster_size);
3615
3616 return 0;
3617}
3618
3619int coroutine_fn qcow2_shrink_reftable(BlockDriverState *bs)
3620{
3621 BDRVQcow2State *s = bs->opaque;
3622 uint64_t *reftable_tmp =
3623 g_malloc(s->refcount_table_size * REFTABLE_ENTRY_SIZE);
3624 int i, ret;
3625
3626 for (i = 0; i < s->refcount_table_size; i++) {
3627 int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK;
3628 void *refblock;
3629 bool unused_block;
3630
3631 if (refblock_offs == 0) {
3632 reftable_tmp[i] = 0;
3633 continue;
3634 }
3635 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
3636 &refblock);
3637 if (ret < 0) {
3638 goto out;
3639 }
3640
3641
3642 if (i == offset_to_reftable_index(s, refblock_offs)) {
3643 uint64_t block_index = (refblock_offs >> s->cluster_bits) &
3644 (s->refcount_block_size - 1);
3645 uint64_t refcount = s->get_refcount(refblock, block_index);
3646
3647 s->set_refcount(refblock, block_index, 0);
3648
3649 unused_block = buffer_is_zero(refblock, s->cluster_size);
3650
3651 s->set_refcount(refblock, block_index, refcount);
3652 } else {
3653 unused_block = buffer_is_zero(refblock, s->cluster_size);
3654 }
3655 qcow2_cache_put(s->refcount_block_cache, &refblock);
3656
3657 reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
3658 }
3659
3660 ret = bdrv_co_pwrite_sync(bs->file, s->refcount_table_offset,
3661 s->refcount_table_size * REFTABLE_ENTRY_SIZE,
3662 reftable_tmp, 0);
3663
3664
3665
3666
3667
3668 for (i = 0; i < s->refcount_table_size; i++) {
3669 if (s->refcount_table[i] && !reftable_tmp[i]) {
3670 if (ret == 0) {
3671 ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] &
3672 REFT_OFFSET_MASK);
3673 }
3674 s->refcount_table[i] = 0;
3675 }
3676 }
3677
3678 if (!s->cache_discards) {
3679 qcow2_process_discards(bs, ret);
3680 }
3681
3682out:
3683 g_free(reftable_tmp);
3684 return ret;
3685}
3686
3687int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
3688{
3689 BDRVQcow2State *s = bs->opaque;
3690 int64_t i;
3691
3692 for (i = size_to_clusters(s, size) - 1; i >= 0; i--) {
3693 uint64_t refcount;
3694 int ret = qcow2_get_refcount(bs, i, &refcount);
3695 if (ret < 0) {
3696 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
3697 i, strerror(-ret));
3698 return ret;
3699 }
3700 if (refcount > 0) {
3701 return i;
3702 }
3703 }
3704 qcow2_signal_corruption(bs, true, -1, -1,
3705 "There are no references in the refcount table.");
3706 return -EIO;
3707}
3708
3709int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs)
3710{
3711 BDRVQcow2State *s = bs->opaque;
3712 int64_t i, end_cluster, cluster_count = 0, threshold;
3713 int64_t file_length, real_allocation, real_clusters;
3714
3715 qemu_co_mutex_assert_locked(&s->lock);
3716
3717 file_length = bdrv_getlength(bs->file->bs);
3718 if (file_length < 0) {
3719 return file_length;
3720 }
3721
3722 real_allocation = bdrv_get_allocated_file_size(bs->file->bs);
3723 if (real_allocation < 0) {
3724 return real_allocation;
3725 }
3726
3727 real_clusters = real_allocation / s->cluster_size;
3728 threshold = MAX(real_clusters * 10 / 9, real_clusters + 2);
3729
3730 end_cluster = size_to_clusters(s, file_length);
3731 for (i = 0; i < end_cluster && cluster_count < threshold; i++) {
3732 uint64_t refcount;
3733 int ret = qcow2_get_refcount(bs, i, &refcount);
3734 if (ret < 0) {
3735 return ret;
3736 }
3737 cluster_count += !!refcount;
3738 }
3739
3740 return cluster_count >= threshold;
3741}
3742