1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "qapi/error.h"
27#include "qcow2.h"
28#include "qemu/range.h"
29#include "qemu/bswap.h"
30#include "qemu/cutils.h"
31#include "qemu/memalign.h"
32#include "trace.h"
33
34static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
35 uint64_t max);
36
37G_GNUC_WARN_UNUSED_RESULT
38static int update_refcount(BlockDriverState *bs,
39 int64_t offset, int64_t length, uint64_t addend,
40 bool decrease, enum qcow2_discard_type type);
41
42static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index);
43static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index);
44static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index);
45static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index);
46static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index);
47static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index);
48static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index);
49
50static void set_refcount_ro0(void *refcount_array, uint64_t index,
51 uint64_t value);
52static void set_refcount_ro1(void *refcount_array, uint64_t index,
53 uint64_t value);
54static void set_refcount_ro2(void *refcount_array, uint64_t index,
55 uint64_t value);
56static void set_refcount_ro3(void *refcount_array, uint64_t index,
57 uint64_t value);
58static void set_refcount_ro4(void *refcount_array, uint64_t index,
59 uint64_t value);
60static void set_refcount_ro5(void *refcount_array, uint64_t index,
61 uint64_t value);
62static void set_refcount_ro6(void *refcount_array, uint64_t index,
63 uint64_t value);
64
65
66static Qcow2GetRefcountFunc *const get_refcount_funcs[] = {
67 &get_refcount_ro0,
68 &get_refcount_ro1,
69 &get_refcount_ro2,
70 &get_refcount_ro3,
71 &get_refcount_ro4,
72 &get_refcount_ro5,
73 &get_refcount_ro6
74};
75
76static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
77 &set_refcount_ro0,
78 &set_refcount_ro1,
79 &set_refcount_ro2,
80 &set_refcount_ro3,
81 &set_refcount_ro4,
82 &set_refcount_ro5,
83 &set_refcount_ro6
84};
85
86
87
88
89
90static void update_max_refcount_table_index(BDRVQcow2State *s)
91{
92 unsigned i = s->refcount_table_size - 1;
93 while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) {
94 i--;
95 }
96
97 s->max_refcount_table_index = i;
98}
99
100int qcow2_refcount_init(BlockDriverState *bs)
101{
102 BDRVQcow2State *s = bs->opaque;
103 unsigned int refcount_table_size2, i;
104 int ret;
105
106 assert(s->refcount_order >= 0 && s->refcount_order <= 6);
107
108 s->get_refcount = get_refcount_funcs[s->refcount_order];
109 s->set_refcount = set_refcount_funcs[s->refcount_order];
110
111 assert(s->refcount_table_size <= INT_MAX / REFTABLE_ENTRY_SIZE);
112 refcount_table_size2 = s->refcount_table_size * REFTABLE_ENTRY_SIZE;
113 s->refcount_table = g_try_malloc(refcount_table_size2);
114
115 if (s->refcount_table_size > 0) {
116 if (s->refcount_table == NULL) {
117 ret = -ENOMEM;
118 goto fail;
119 }
120 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
121 ret = bdrv_pread(bs->file, s->refcount_table_offset,
122 s->refcount_table, refcount_table_size2);
123 if (ret < 0) {
124 goto fail;
125 }
126 for(i = 0; i < s->refcount_table_size; i++)
127 be64_to_cpus(&s->refcount_table[i]);
128 update_max_refcount_table_index(s);
129 }
130 return 0;
131 fail:
132 return ret;
133}
134
135void qcow2_refcount_close(BlockDriverState *bs)
136{
137 BDRVQcow2State *s = bs->opaque;
138 g_free(s->refcount_table);
139}
140
141
142static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index)
143{
144 return (((const uint8_t *)refcount_array)[index / 8] >> (index % 8)) & 0x1;
145}
146
147static void set_refcount_ro0(void *refcount_array, uint64_t index,
148 uint64_t value)
149{
150 assert(!(value >> 1));
151 ((uint8_t *)refcount_array)[index / 8] &= ~(0x1 << (index % 8));
152 ((uint8_t *)refcount_array)[index / 8] |= value << (index % 8);
153}
154
155static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index)
156{
157 return (((const uint8_t *)refcount_array)[index / 4] >> (2 * (index % 4)))
158 & 0x3;
159}
160
161static void set_refcount_ro1(void *refcount_array, uint64_t index,
162 uint64_t value)
163{
164 assert(!(value >> 2));
165 ((uint8_t *)refcount_array)[index / 4] &= ~(0x3 << (2 * (index % 4)));
166 ((uint8_t *)refcount_array)[index / 4] |= value << (2 * (index % 4));
167}
168
169static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index)
170{
171 return (((const uint8_t *)refcount_array)[index / 2] >> (4 * (index % 2)))
172 & 0xf;
173}
174
175static void set_refcount_ro2(void *refcount_array, uint64_t index,
176 uint64_t value)
177{
178 assert(!(value >> 4));
179 ((uint8_t *)refcount_array)[index / 2] &= ~(0xf << (4 * (index % 2)));
180 ((uint8_t *)refcount_array)[index / 2] |= value << (4 * (index % 2));
181}
182
183static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index)
184{
185 return ((const uint8_t *)refcount_array)[index];
186}
187
188static void set_refcount_ro3(void *refcount_array, uint64_t index,
189 uint64_t value)
190{
191 assert(!(value >> 8));
192 ((uint8_t *)refcount_array)[index] = value;
193}
194
195static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index)
196{
197 return be16_to_cpu(((const uint16_t *)refcount_array)[index]);
198}
199
200static void set_refcount_ro4(void *refcount_array, uint64_t index,
201 uint64_t value)
202{
203 assert(!(value >> 16));
204 ((uint16_t *)refcount_array)[index] = cpu_to_be16(value);
205}
206
207static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index)
208{
209 return be32_to_cpu(((const uint32_t *)refcount_array)[index]);
210}
211
212static void set_refcount_ro5(void *refcount_array, uint64_t index,
213 uint64_t value)
214{
215 assert(!(value >> 32));
216 ((uint32_t *)refcount_array)[index] = cpu_to_be32(value);
217}
218
219static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index)
220{
221 return be64_to_cpu(((const uint64_t *)refcount_array)[index]);
222}
223
224static void set_refcount_ro6(void *refcount_array, uint64_t index,
225 uint64_t value)
226{
227 ((uint64_t *)refcount_array)[index] = cpu_to_be64(value);
228}
229
230
231static int load_refcount_block(BlockDriverState *bs,
232 int64_t refcount_block_offset,
233 void **refcount_block)
234{
235 BDRVQcow2State *s = bs->opaque;
236
237 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
238 return qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
239 refcount_block);
240}
241
242
243
244
245
246int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
247 uint64_t *refcount)
248{
249 BDRVQcow2State *s = bs->opaque;
250 uint64_t refcount_table_index, block_index;
251 int64_t refcount_block_offset;
252 int ret;
253 void *refcount_block;
254
255 refcount_table_index = cluster_index >> s->refcount_block_bits;
256 if (refcount_table_index >= s->refcount_table_size) {
257 *refcount = 0;
258 return 0;
259 }
260 refcount_block_offset =
261 s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
262 if (!refcount_block_offset) {
263 *refcount = 0;
264 return 0;
265 }
266
267 if (offset_into_cluster(s, refcount_block_offset)) {
268 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64
269 " unaligned (reftable index: %#" PRIx64 ")",
270 refcount_block_offset, refcount_table_index);
271 return -EIO;
272 }
273
274 ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
275 &refcount_block);
276 if (ret < 0) {
277 return ret;
278 }
279
280 block_index = cluster_index & (s->refcount_block_size - 1);
281 *refcount = s->get_refcount(refcount_block, block_index);
282
283 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
284
285 return 0;
286}
287
288
289static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
290 uint64_t offset_b)
291{
292 uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits);
293 uint64_t block_b = offset_b >> (s->cluster_bits + s->refcount_block_bits);
294
295 return (block_a == block_b);
296}
297
298
299
300
301
302
303
304static int alloc_refcount_block(BlockDriverState *bs,
305 int64_t cluster_index, void **refcount_block)
306{
307 BDRVQcow2State *s = bs->opaque;
308 unsigned int refcount_table_index;
309 int64_t ret;
310
311 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
312
313
314 refcount_table_index = cluster_index >> s->refcount_block_bits;
315
316 if (refcount_table_index < s->refcount_table_size) {
317
318 uint64_t refcount_block_offset =
319 s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
320
321
322 if (refcount_block_offset) {
323 if (offset_into_cluster(s, refcount_block_offset)) {
324 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
325 PRIx64 " unaligned (reftable index: "
326 "%#x)", refcount_block_offset,
327 refcount_table_index);
328 return -EIO;
329 }
330
331 return load_refcount_block(bs, refcount_block_offset,
332 refcount_block);
333 }
334 }
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359 *refcount_block = NULL;
360
361
362 ret = qcow2_cache_flush(bs, s->l2_table_cache);
363 if (ret < 0) {
364 return ret;
365 }
366
367
368 int64_t new_block = alloc_clusters_noref(bs, s->cluster_size, INT64_MAX);
369 if (new_block < 0) {
370 return new_block;
371 }
372
373
374 assert((new_block & REFT_OFFSET_MASK) == new_block);
375
376
377 if (new_block == 0) {
378 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
379 "allocation of refcount block at offset 0");
380 return -EIO;
381 }
382
383#ifdef DEBUG_ALLOC2
384 fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
385 " at %" PRIx64 "\n",
386 refcount_table_index, cluster_index << s->cluster_bits, new_block);
387#endif
388
389 if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
390
391 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
392 refcount_block);
393 if (ret < 0) {
394 goto fail;
395 }
396
397 memset(*refcount_block, 0, s->cluster_size);
398
399
400 int block_index = (new_block >> s->cluster_bits) &
401 (s->refcount_block_size - 1);
402 s->set_refcount(*refcount_block, block_index, 1);
403 } else {
404
405
406 ret = update_refcount(bs, new_block, s->cluster_size, 1, false,
407 QCOW2_DISCARD_NEVER);
408 if (ret < 0) {
409 goto fail;
410 }
411
412 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
413 if (ret < 0) {
414 goto fail;
415 }
416
417
418
419 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
420 refcount_block);
421 if (ret < 0) {
422 goto fail;
423 }
424
425 memset(*refcount_block, 0, s->cluster_size);
426 }
427
428
429 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
430 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
431 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
432 if (ret < 0) {
433 goto fail;
434 }
435
436
437 if (refcount_table_index < s->refcount_table_size) {
438 uint64_t data64 = cpu_to_be64(new_block);
439 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
440 ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
441 refcount_table_index * REFTABLE_ENTRY_SIZE,
442 &data64, sizeof(data64));
443 if (ret < 0) {
444 goto fail;
445 }
446
447 s->refcount_table[refcount_table_index] = new_block;
448
449
450 s->max_refcount_table_index =
451 MAX(s->max_refcount_table_index, refcount_table_index);
452
453
454
455 return -EAGAIN;
456 }
457
458 qcow2_cache_put(s->refcount_block_cache, refcount_block);
459
460
461
462
463
464
465
466
467
468
469
470
471 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
472
473
474
475
476
477
478
479
480
481
482
483
484 uint64_t blocks_used = DIV_ROUND_UP(MAX(cluster_index + 1,
485 (new_block >> s->cluster_bits) + 1),
486 s->refcount_block_size);
487
488
489 uint64_t meta_offset = (blocks_used * s->refcount_block_size) *
490 s->cluster_size;
491
492 ret = qcow2_refcount_area(bs, meta_offset, 0, false,
493 refcount_table_index, new_block);
494 if (ret < 0) {
495 return ret;
496 }
497
498 ret = load_refcount_block(bs, new_block, refcount_block);
499 if (ret < 0) {
500 return ret;
501 }
502
503
504
505
506 return -EAGAIN;
507
508fail:
509 if (*refcount_block != NULL) {
510 qcow2_cache_put(s->refcount_block_cache, refcount_block);
511 }
512 return ret;
513}
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
531 uint64_t additional_clusters, bool exact_size,
532 int new_refblock_index,
533 uint64_t new_refblock_offset)
534{
535 BDRVQcow2State *s = bs->opaque;
536 uint64_t total_refblock_count_u64, additional_refblock_count;
537 int total_refblock_count, table_size, area_reftable_index, table_clusters;
538 int i;
539 uint64_t table_offset, block_offset, end_offset;
540 int ret;
541 uint64_t *new_table;
542
543 assert(!(start_offset % s->cluster_size));
544
545 qcow2_refcount_metadata_size(start_offset / s->cluster_size +
546 additional_clusters,
547 s->cluster_size, s->refcount_order,
548 !exact_size, &total_refblock_count_u64);
549 if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) {
550 return -EFBIG;
551 }
552 total_refblock_count = total_refblock_count_u64;
553
554
555
556
557
558 area_reftable_index = (start_offset / s->cluster_size) /
559 s->refcount_block_size;
560
561 if (exact_size) {
562 table_size = total_refblock_count;
563 } else {
564 table_size = total_refblock_count +
565 DIV_ROUND_UP(total_refblock_count, 2);
566 }
567
568 table_size = ROUND_UP(table_size, s->cluster_size / REFTABLE_ENTRY_SIZE);
569 table_clusters = (table_size * REFTABLE_ENTRY_SIZE) / s->cluster_size;
570
571 if (table_size > QCOW_MAX_REFTABLE_SIZE) {
572 return -EFBIG;
573 }
574
575 new_table = g_try_new0(uint64_t, table_size);
576
577 assert(table_size > 0);
578 if (new_table == NULL) {
579 ret = -ENOMEM;
580 goto fail;
581 }
582
583
584 if (table_size > s->max_refcount_table_index) {
585
586 memcpy(new_table, s->refcount_table,
587 (s->max_refcount_table_index + 1) * REFTABLE_ENTRY_SIZE);
588 } else {
589
590
591
592
593 memcpy(new_table, s->refcount_table, table_size * REFTABLE_ENTRY_SIZE);
594 }
595
596 if (new_refblock_offset) {
597 assert(new_refblock_index < total_refblock_count);
598 new_table[new_refblock_index] = new_refblock_offset;
599 }
600
601
602 additional_refblock_count = 0;
603 for (i = area_reftable_index; i < total_refblock_count; i++) {
604 if (!new_table[i]) {
605 additional_refblock_count++;
606 }
607 }
608
609 table_offset = start_offset + additional_refblock_count * s->cluster_size;
610 end_offset = table_offset + table_clusters * s->cluster_size;
611
612
613 block_offset = start_offset;
614 for (i = area_reftable_index; i < total_refblock_count; i++) {
615 void *refblock_data;
616 uint64_t first_offset_covered;
617
618
619 if (new_table[i]) {
620 ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i],
621 &refblock_data);
622 if (ret < 0) {
623 goto fail;
624 }
625 } else {
626 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache,
627 block_offset, &refblock_data);
628 if (ret < 0) {
629 goto fail;
630 }
631 memset(refblock_data, 0, s->cluster_size);
632 qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
633 refblock_data);
634
635 new_table[i] = block_offset;
636 block_offset += s->cluster_size;
637 }
638
639
640 first_offset_covered = (uint64_t)i * s->refcount_block_size *
641 s->cluster_size;
642 if (first_offset_covered < end_offset) {
643 int j, end_index;
644
645
646
647 if (first_offset_covered < start_offset) {
648 assert(i == area_reftable_index);
649 j = (start_offset - first_offset_covered) / s->cluster_size;
650 assert(j < s->refcount_block_size);
651 } else {
652 j = 0;
653 }
654
655 end_index = MIN((end_offset - first_offset_covered) /
656 s->cluster_size,
657 s->refcount_block_size);
658
659 for (; j < end_index; j++) {
660
661 assert(s->get_refcount(refblock_data, j) == 0);
662 s->set_refcount(refblock_data, j, 1);
663 }
664
665 qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
666 refblock_data);
667 }
668
669 qcow2_cache_put(s->refcount_block_cache, &refblock_data);
670 }
671
672 assert(block_offset == table_offset);
673
674
675 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
676 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
677 if (ret < 0) {
678 goto fail;
679 }
680
681
682 for (i = 0; i < total_refblock_count; i++) {
683 cpu_to_be64s(&new_table[i]);
684 }
685
686 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
687 ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
688 table_size * REFTABLE_ENTRY_SIZE);
689 if (ret < 0) {
690 goto fail;
691 }
692
693 for (i = 0; i < total_refblock_count; i++) {
694 be64_to_cpus(&new_table[i]);
695 }
696
697
698 struct QEMU_PACKED {
699 uint64_t d64;
700 uint32_t d32;
701 } data;
702 data.d64 = cpu_to_be64(table_offset);
703 data.d32 = cpu_to_be32(table_clusters);
704 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
705 ret = bdrv_pwrite_sync(bs->file,
706 offsetof(QCowHeader, refcount_table_offset),
707 &data, sizeof(data));
708 if (ret < 0) {
709 goto fail;
710 }
711
712
713 uint64_t old_table_offset = s->refcount_table_offset;
714 uint64_t old_table_size = s->refcount_table_size;
715
716 g_free(s->refcount_table);
717 s->refcount_table = new_table;
718 s->refcount_table_size = table_size;
719 s->refcount_table_offset = table_offset;
720 update_max_refcount_table_index(s);
721
722
723 qcow2_free_clusters(bs, old_table_offset,
724 old_table_size * REFTABLE_ENTRY_SIZE,
725 QCOW2_DISCARD_OTHER);
726
727 return end_offset;
728
729fail:
730 g_free(new_table);
731 return ret;
732}
733
734void qcow2_process_discards(BlockDriverState *bs, int ret)
735{
736 BDRVQcow2State *s = bs->opaque;
737 Qcow2DiscardRegion *d, *next;
738
739 QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
740 QTAILQ_REMOVE(&s->discards, d, next);
741
742
743 if (ret >= 0) {
744 int r2 = bdrv_pdiscard(bs->file, d->offset, d->bytes);
745 if (r2 < 0) {
746 trace_qcow2_process_discards_failed_region(d->offset, d->bytes,
747 r2);
748 }
749 }
750
751 g_free(d);
752 }
753}
754
755static void update_refcount_discard(BlockDriverState *bs,
756 uint64_t offset, uint64_t length)
757{
758 BDRVQcow2State *s = bs->opaque;
759 Qcow2DiscardRegion *d, *p, *next;
760
761 QTAILQ_FOREACH(d, &s->discards, next) {
762 uint64_t new_start = MIN(offset, d->offset);
763 uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
764
765 if (new_end - new_start <= length + d->bytes) {
766
767
768
769 assert(d->bytes + length == new_end - new_start);
770 d->offset = new_start;
771 d->bytes = new_end - new_start;
772 goto found;
773 }
774 }
775
776 d = g_malloc(sizeof(*d));
777 *d = (Qcow2DiscardRegion) {
778 .bs = bs,
779 .offset = offset,
780 .bytes = length,
781 };
782 QTAILQ_INSERT_TAIL(&s->discards, d, next);
783
784found:
785
786 QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
787 if (p == d
788 || p->offset > d->offset + d->bytes
789 || d->offset > p->offset + p->bytes)
790 {
791 continue;
792 }
793
794
795 assert(p->offset == d->offset + d->bytes
796 || d->offset == p->offset + p->bytes);
797
798 QTAILQ_REMOVE(&s->discards, p, next);
799 d->offset = MIN(d->offset, p->offset);
800 d->bytes += p->bytes;
801 g_free(p);
802 }
803}
804
805
806
807
808static int update_refcount(BlockDriverState *bs,
809 int64_t offset,
810 int64_t length,
811 uint64_t addend,
812 bool decrease,
813 enum qcow2_discard_type type)
814{
815 BDRVQcow2State *s = bs->opaque;
816 int64_t start, last, cluster_offset;
817 void *refcount_block = NULL;
818 int64_t old_table_index = -1;
819 int ret;
820
821#ifdef DEBUG_ALLOC2
822 fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64
823 " addend=%s%" PRIu64 "\n", offset, length, decrease ? "-" : "",
824 addend);
825#endif
826 if (length < 0) {
827 return -EINVAL;
828 } else if (length == 0) {
829 return 0;
830 }
831
832 if (decrease) {
833 qcow2_cache_set_dependency(bs, s->refcount_block_cache,
834 s->l2_table_cache);
835 }
836
837 start = start_of_cluster(s, offset);
838 last = start_of_cluster(s, offset + length - 1);
839 for(cluster_offset = start; cluster_offset <= last;
840 cluster_offset += s->cluster_size)
841 {
842 int block_index;
843 uint64_t refcount;
844 int64_t cluster_index = cluster_offset >> s->cluster_bits;
845 int64_t table_index = cluster_index >> s->refcount_block_bits;
846
847
848 if (table_index != old_table_index) {
849 if (refcount_block) {
850 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
851 }
852 ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
853
854
855 if (ret == -EAGAIN) {
856 if (s->free_cluster_index > (start >> s->cluster_bits)) {
857 s->free_cluster_index = (start >> s->cluster_bits);
858 }
859 }
860 if (ret < 0) {
861 goto fail;
862 }
863 }
864 old_table_index = table_index;
865
866 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
867
868
869 block_index = cluster_index & (s->refcount_block_size - 1);
870
871 refcount = s->get_refcount(refcount_block, block_index);
872 if (decrease ? (refcount - addend > refcount)
873 : (refcount + addend < refcount ||
874 refcount + addend > s->refcount_max))
875 {
876 ret = -EINVAL;
877 goto fail;
878 }
879 if (decrease) {
880 refcount -= addend;
881 } else {
882 refcount += addend;
883 }
884 if (refcount == 0 && cluster_index < s->free_cluster_index) {
885 s->free_cluster_index = cluster_index;
886 }
887 s->set_refcount(refcount_block, block_index, refcount);
888
889 if (refcount == 0) {
890 void *table;
891
892 table = qcow2_cache_is_table_offset(s->refcount_block_cache,
893 offset);
894 if (table != NULL) {
895 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
896 old_table_index = -1;
897 qcow2_cache_discard(s->refcount_block_cache, table);
898 }
899
900 table = qcow2_cache_is_table_offset(s->l2_table_cache, offset);
901 if (table != NULL) {
902 qcow2_cache_discard(s->l2_table_cache, table);
903 }
904
905 if (s->discard_passthrough[type]) {
906 update_refcount_discard(bs, cluster_offset, s->cluster_size);
907 }
908 }
909 }
910
911 ret = 0;
912fail:
913 if (!s->cache_discards) {
914 qcow2_process_discards(bs, ret);
915 }
916
917
918 if (refcount_block) {
919 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
920 }
921
922
923
924
925
926 if (ret < 0) {
927 int dummy;
928 dummy = update_refcount(bs, offset, cluster_offset - offset, addend,
929 !decrease, QCOW2_DISCARD_NEVER);
930 (void)dummy;
931 }
932
933 return ret;
934}
935
936
937
938
939
940
941
942
943
944int qcow2_update_cluster_refcount(BlockDriverState *bs,
945 int64_t cluster_index,
946 uint64_t addend, bool decrease,
947 enum qcow2_discard_type type)
948{
949 BDRVQcow2State *s = bs->opaque;
950 int ret;
951
952 ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
953 decrease, type);
954 if (ret < 0) {
955 return ret;
956 }
957
958 return 0;
959}
960
961
962
963
964
965
966
967
968
969static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
970 uint64_t max)
971{
972 BDRVQcow2State *s = bs->opaque;
973 uint64_t i, nb_clusters, refcount;
974 int ret;
975
976
977 if (s->cache_discards) {
978 qcow2_process_discards(bs, 0);
979 }
980
981 nb_clusters = size_to_clusters(s, size);
982retry:
983 for(i = 0; i < nb_clusters; i++) {
984 uint64_t next_cluster_index = s->free_cluster_index++;
985 ret = qcow2_get_refcount(bs, next_cluster_index, &refcount);
986
987 if (ret < 0) {
988 return ret;
989 } else if (refcount != 0) {
990 goto retry;
991 }
992 }
993
994
995
996 if (s->free_cluster_index > 0 &&
997 s->free_cluster_index - 1 > (max >> s->cluster_bits))
998 {
999 return -EFBIG;
1000 }
1001
1002#ifdef DEBUG_ALLOC2
1003 fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
1004 size,
1005 (s->free_cluster_index - nb_clusters) << s->cluster_bits);
1006#endif
1007 return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
1008}
1009
1010int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
1011{
1012 int64_t offset;
1013 int ret;
1014
1015 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
1016 do {
1017 offset = alloc_clusters_noref(bs, size, QCOW_MAX_CLUSTER_OFFSET);
1018 if (offset < 0) {
1019 return offset;
1020 }
1021
1022 ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
1023 } while (ret == -EAGAIN);
1024
1025 if (ret < 0) {
1026 return ret;
1027 }
1028
1029 return offset;
1030}
1031
1032int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
1033 int64_t nb_clusters)
1034{
1035 BDRVQcow2State *s = bs->opaque;
1036 uint64_t cluster_index, refcount;
1037 uint64_t i;
1038 int ret;
1039
1040 assert(nb_clusters >= 0);
1041 if (nb_clusters == 0) {
1042 return 0;
1043 }
1044
1045 do {
1046
1047 cluster_index = offset >> s->cluster_bits;
1048 for(i = 0; i < nb_clusters; i++) {
1049 ret = qcow2_get_refcount(bs, cluster_index++, &refcount);
1050 if (ret < 0) {
1051 return ret;
1052 } else if (refcount != 0) {
1053 break;
1054 }
1055 }
1056
1057
1058 ret = update_refcount(bs, offset, i << s->cluster_bits, 1, false,
1059 QCOW2_DISCARD_NEVER);
1060 } while (ret == -EAGAIN);
1061
1062 if (ret < 0) {
1063 return ret;
1064 }
1065
1066 return i;
1067}
1068
1069
1070
1071int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
1072{
1073 BDRVQcow2State *s = bs->opaque;
1074 int64_t offset;
1075 size_t free_in_cluster;
1076 int ret;
1077
1078 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
1079 assert(size > 0 && size <= s->cluster_size);
1080 assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset));
1081
1082 offset = s->free_byte_offset;
1083
1084 if (offset) {
1085 uint64_t refcount;
1086 ret = qcow2_get_refcount(bs, offset >> s->cluster_bits, &refcount);
1087 if (ret < 0) {
1088 return ret;
1089 }
1090
1091 if (refcount == s->refcount_max) {
1092 offset = 0;
1093 }
1094 }
1095
1096 free_in_cluster = s->cluster_size - offset_into_cluster(s, offset);
1097 do {
1098 if (!offset || free_in_cluster < size) {
1099 int64_t new_cluster;
1100
1101 new_cluster = alloc_clusters_noref(bs, s->cluster_size,
1102 MIN(s->cluster_offset_mask,
1103 QCOW_MAX_CLUSTER_OFFSET));
1104 if (new_cluster < 0) {
1105 return new_cluster;
1106 }
1107
1108 if (new_cluster == 0) {
1109 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
1110 "allocation of compressed cluster "
1111 "at offset 0");
1112 return -EIO;
1113 }
1114
1115 if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
1116 offset = new_cluster;
1117 free_in_cluster = s->cluster_size;
1118 } else {
1119 free_in_cluster += s->cluster_size;
1120 }
1121 }
1122
1123 assert(offset);
1124 ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
1125 if (ret < 0) {
1126 offset = 0;
1127 }
1128 } while (ret == -EAGAIN);
1129 if (ret < 0) {
1130 return ret;
1131 }
1132
1133
1134
1135 qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
1136
1137 s->free_byte_offset = offset + size;
1138 if (!offset_into_cluster(s, s->free_byte_offset)) {
1139 s->free_byte_offset = 0;
1140 }
1141
1142 return offset;
1143}
1144
1145void qcow2_free_clusters(BlockDriverState *bs,
1146 int64_t offset, int64_t size,
1147 enum qcow2_discard_type type)
1148{
1149 int ret;
1150
1151 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
1152 ret = update_refcount(bs, offset, size, 1, true, type);
1153 if (ret < 0) {
1154 fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
1155
1156 }
1157}
1158
1159
1160
1161
1162
1163void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
1164 enum qcow2_discard_type type)
1165{
1166 BDRVQcow2State *s = bs->opaque;
1167 QCow2ClusterType ctype = qcow2_get_cluster_type(bs, l2_entry);
1168
1169 if (has_data_file(bs)) {
1170 if (s->discard_passthrough[type] &&
1171 (ctype == QCOW2_CLUSTER_NORMAL ||
1172 ctype == QCOW2_CLUSTER_ZERO_ALLOC))
1173 {
1174 bdrv_pdiscard(s->data_file, l2_entry & L2E_OFFSET_MASK,
1175 s->cluster_size);
1176 }
1177 return;
1178 }
1179
1180 switch (ctype) {
1181 case QCOW2_CLUSTER_COMPRESSED:
1182 {
1183 uint64_t coffset;
1184 int csize;
1185
1186 qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
1187 qcow2_free_clusters(bs, coffset, csize, type);
1188 }
1189 break;
1190 case QCOW2_CLUSTER_NORMAL:
1191 case QCOW2_CLUSTER_ZERO_ALLOC:
1192 if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
1193 qcow2_signal_corruption(bs, false, -1, -1,
1194 "Cannot free unaligned cluster %#llx",
1195 l2_entry & L2E_OFFSET_MASK);
1196 } else {
1197 qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
1198 s->cluster_size, type);
1199 }
1200 break;
1201 case QCOW2_CLUSTER_ZERO_PLAIN:
1202 case QCOW2_CLUSTER_UNALLOCATED:
1203 break;
1204 default:
1205 abort();
1206 }
1207}
1208
1209int coroutine_fn qcow2_write_caches(BlockDriverState *bs)
1210{
1211 BDRVQcow2State *s = bs->opaque;
1212 int ret;
1213
1214 ret = qcow2_cache_write(bs, s->l2_table_cache);
1215 if (ret < 0) {
1216 return ret;
1217 }
1218
1219 if (qcow2_need_accurate_refcounts(s)) {
1220 ret = qcow2_cache_write(bs, s->refcount_block_cache);
1221 if (ret < 0) {
1222 return ret;
1223 }
1224 }
1225
1226 return 0;
1227}
1228
1229int coroutine_fn qcow2_flush_caches(BlockDriverState *bs)
1230{
1231 int ret = qcow2_write_caches(bs);
1232 if (ret < 0) {
1233 return ret;
1234 }
1235
1236 return bdrv_flush(bs->file->bs);
1237}
1238
1239
1240
1241
1242
1243
1244
1245int qcow2_update_snapshot_refcount(BlockDriverState *bs,
1246 int64_t l1_table_offset, int l1_size, int addend)
1247{
1248 BDRVQcow2State *s = bs->opaque;
1249 uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount;
1250 bool l1_allocated = false;
1251 int64_t old_entry, old_l2_offset;
1252 unsigned slice, slice_size2, n_slices;
1253 int i, j, l1_modified = 0;
1254 int ret;
1255
1256 assert(addend >= -1 && addend <= 1);
1257
1258 l2_slice = NULL;
1259 l1_table = NULL;
1260 l1_size2 = l1_size * L1E_SIZE;
1261 slice_size2 = s->l2_slice_size * l2_entry_size(s);
1262 n_slices = s->cluster_size / slice_size2;
1263
1264 s->cache_discards = true;
1265
1266
1267
1268
1269 if (l1_table_offset != s->l1_table_offset) {
1270 l1_table = g_try_malloc0(l1_size2);
1271 if (l1_size2 && l1_table == NULL) {
1272 ret = -ENOMEM;
1273 goto fail;
1274 }
1275 l1_allocated = true;
1276
1277 ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
1278 if (ret < 0) {
1279 goto fail;
1280 }
1281
1282 for (i = 0; i < l1_size; i++) {
1283 be64_to_cpus(&l1_table[i]);
1284 }
1285 } else {
1286 assert(l1_size == s->l1_size);
1287 l1_table = s->l1_table;
1288 l1_allocated = false;
1289 }
1290
1291 for (i = 0; i < l1_size; i++) {
1292 l2_offset = l1_table[i];
1293 if (l2_offset) {
1294 old_l2_offset = l2_offset;
1295 l2_offset &= L1E_OFFSET_MASK;
1296
1297 if (offset_into_cluster(s, l2_offset)) {
1298 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
1299 PRIx64 " unaligned (L1 index: %#x)",
1300 l2_offset, i);
1301 ret = -EIO;
1302 goto fail;
1303 }
1304
1305 for (slice = 0; slice < n_slices; slice++) {
1306 ret = qcow2_cache_get(bs, s->l2_table_cache,
1307 l2_offset + slice * slice_size2,
1308 (void **) &l2_slice);
1309 if (ret < 0) {
1310 goto fail;
1311 }
1312
1313 for (j = 0; j < s->l2_slice_size; j++) {
1314 uint64_t cluster_index;
1315 uint64_t offset;
1316
1317 entry = get_l2_entry(s, l2_slice, j);
1318 old_entry = entry;
1319 entry &= ~QCOW_OFLAG_COPIED;
1320 offset = entry & L2E_OFFSET_MASK;
1321
1322 switch (qcow2_get_cluster_type(bs, entry)) {
1323 case QCOW2_CLUSTER_COMPRESSED:
1324 if (addend != 0) {
1325 uint64_t coffset;
1326 int csize;
1327
1328 qcow2_parse_compressed_l2_entry(bs, entry,
1329 &coffset, &csize);
1330 ret = update_refcount(
1331 bs, coffset, csize,
1332 abs(addend), addend < 0,
1333 QCOW2_DISCARD_SNAPSHOT);
1334 if (ret < 0) {
1335 goto fail;
1336 }
1337 }
1338
1339 refcount = 2;
1340 break;
1341
1342 case QCOW2_CLUSTER_NORMAL:
1343 case QCOW2_CLUSTER_ZERO_ALLOC:
1344 if (offset_into_cluster(s, offset)) {
1345
1346 int l2_index = slice * s->l2_slice_size + j;
1347 qcow2_signal_corruption(
1348 bs, true, -1, -1, "Cluster "
1349 "allocation offset %#" PRIx64
1350 " unaligned (L2 offset: %#"
1351 PRIx64 ", L2 index: %#x)",
1352 offset, l2_offset, l2_index);
1353 ret = -EIO;
1354 goto fail;
1355 }
1356
1357 cluster_index = offset >> s->cluster_bits;
1358 assert(cluster_index);
1359 if (addend != 0) {
1360 ret = qcow2_update_cluster_refcount(
1361 bs, cluster_index, abs(addend), addend < 0,
1362 QCOW2_DISCARD_SNAPSHOT);
1363 if (ret < 0) {
1364 goto fail;
1365 }
1366 }
1367
1368 ret = qcow2_get_refcount(bs, cluster_index, &refcount);
1369 if (ret < 0) {
1370 goto fail;
1371 }
1372 break;
1373
1374 case QCOW2_CLUSTER_ZERO_PLAIN:
1375 case QCOW2_CLUSTER_UNALLOCATED:
1376 refcount = 0;
1377 break;
1378
1379 default:
1380 abort();
1381 }
1382
1383 if (refcount == 1) {
1384 entry |= QCOW_OFLAG_COPIED;
1385 }
1386 if (entry != old_entry) {
1387 if (addend > 0) {
1388 qcow2_cache_set_dependency(bs, s->l2_table_cache,
1389 s->refcount_block_cache);
1390 }
1391 set_l2_entry(s, l2_slice, j, entry);
1392 qcow2_cache_entry_mark_dirty(s->l2_table_cache,
1393 l2_slice);
1394 }
1395 }
1396
1397 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1398 }
1399
1400 if (addend != 0) {
1401 ret = qcow2_update_cluster_refcount(bs, l2_offset >>
1402 s->cluster_bits,
1403 abs(addend), addend < 0,
1404 QCOW2_DISCARD_SNAPSHOT);
1405 if (ret < 0) {
1406 goto fail;
1407 }
1408 }
1409 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
1410 &refcount);
1411 if (ret < 0) {
1412 goto fail;
1413 } else if (refcount == 1) {
1414 l2_offset |= QCOW_OFLAG_COPIED;
1415 }
1416 if (l2_offset != old_l2_offset) {
1417 l1_table[i] = l2_offset;
1418 l1_modified = 1;
1419 }
1420 }
1421 }
1422
1423 ret = bdrv_flush(bs);
1424fail:
1425 if (l2_slice) {
1426 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1427 }
1428
1429 s->cache_discards = false;
1430 qcow2_process_discards(bs, ret);
1431
1432
1433 if (ret == 0 && addend >= 0 && l1_modified) {
1434 for (i = 0; i < l1_size; i++) {
1435 cpu_to_be64s(&l1_table[i]);
1436 }
1437
1438 ret = bdrv_pwrite_sync(bs->file, l1_table_offset,
1439 l1_table, l1_size2);
1440
1441 for (i = 0; i < l1_size; i++) {
1442 be64_to_cpus(&l1_table[i]);
1443 }
1444 }
1445 if (l1_allocated)
1446 g_free(l1_table);
1447 return ret;
1448}
1449
1450
1451
1452
1453
1454
1455
1456
1457static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries)
1458{
1459
1460
1461
1462
1463 assert(entries < (UINT64_C(1) << (64 - 9)));
1464
1465
1466
1467
1468 return DIV_ROUND_UP(entries << s->refcount_order, 8);
1469}
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480static int realloc_refcount_array(BDRVQcow2State *s, void **array,
1481 int64_t *size, int64_t new_size)
1482{
1483 int64_t old_byte_size, new_byte_size;
1484 void *new_ptr;
1485
1486
1487 old_byte_size = size_to_clusters(s, refcount_array_byte_size(s, *size))
1488 * s->cluster_size;
1489 new_byte_size = size_to_clusters(s, refcount_array_byte_size(s, new_size))
1490 * s->cluster_size;
1491
1492 if (new_byte_size == old_byte_size) {
1493 *size = new_size;
1494 return 0;
1495 }
1496
1497 assert(new_byte_size > 0);
1498
1499 if (new_byte_size > SIZE_MAX) {
1500 return -ENOMEM;
1501 }
1502
1503 new_ptr = g_try_realloc(*array, new_byte_size);
1504 if (!new_ptr) {
1505 return -ENOMEM;
1506 }
1507
1508 if (new_byte_size > old_byte_size) {
1509 memset((char *)new_ptr + old_byte_size, 0,
1510 new_byte_size - old_byte_size);
1511 }
1512
1513 *array = new_ptr;
1514 *size = new_size;
1515
1516 return 0;
1517}
1518
1519
1520
1521
1522
1523
1524
1525
1526int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
1527 void **refcount_table,
1528 int64_t *refcount_table_size,
1529 int64_t offset, int64_t size)
1530{
1531 BDRVQcow2State *s = bs->opaque;
1532 uint64_t start, last, cluster_offset, k, refcount;
1533 int64_t file_len;
1534 int ret;
1535
1536 if (size <= 0) {
1537 return 0;
1538 }
1539
1540 file_len = bdrv_getlength(bs->file->bs);
1541 if (file_len < 0) {
1542 return file_len;
1543 }
1544
1545
1546
1547
1548
1549
1550 if (offset + size - file_len >= s->cluster_size) {
1551 fprintf(stderr, "ERROR: counting reference for region exceeding the "
1552 "end of the file by one cluster or more: offset 0x%" PRIx64
1553 " size 0x%" PRIx64 "\n", offset, size);
1554 res->corruptions++;
1555 return 0;
1556 }
1557
1558 start = start_of_cluster(s, offset);
1559 last = start_of_cluster(s, offset + size - 1);
1560 for(cluster_offset = start; cluster_offset <= last;
1561 cluster_offset += s->cluster_size) {
1562 k = cluster_offset >> s->cluster_bits;
1563 if (k >= *refcount_table_size) {
1564 ret = realloc_refcount_array(s, refcount_table,
1565 refcount_table_size, k + 1);
1566 if (ret < 0) {
1567 res->check_errors++;
1568 return ret;
1569 }
1570 }
1571
1572 refcount = s->get_refcount(*refcount_table, k);
1573 if (refcount == s->refcount_max) {
1574 fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
1575 "\n", cluster_offset);
1576 fprintf(stderr, "Use qemu-img amend to increase the refcount entry "
1577 "width or qemu-img convert to create a clean copy if the "
1578 "image cannot be opened for writing\n");
1579 res->corruptions++;
1580 continue;
1581 }
1582 s->set_refcount(*refcount_table, k, refcount + 1);
1583 }
1584
1585 return 0;
1586}
1587
1588
1589enum {
1590 CHECK_FRAG_INFO = 0x2,
1591};
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
1603 uint64_t l2_offset,
1604 uint64_t *l2_table, int l2_index, bool active,
1605 bool *metadata_overlap)
1606{
1607 BDRVQcow2State *s = bs->opaque;
1608 int ret;
1609 int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t));
1610 uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s);
1611 int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2;
1612
1613 if (has_subclusters(s)) {
1614 uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, l2_index);
1615
1616
1617 l2_bitmap |= l2_bitmap << 32;
1618 l2_bitmap &= QCOW_L2_BITMAP_ALL_ZEROES;
1619
1620 set_l2_bitmap(s, l2_table, l2_index, l2_bitmap);
1621 set_l2_entry(s, l2_table, l2_index, 0);
1622 } else {
1623 set_l2_entry(s, l2_table, l2_index, QCOW_OFLAG_ZERO);
1624 }
1625
1626 ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s),
1627 false);
1628 if (metadata_overlap) {
1629 *metadata_overlap = ret < 0;
1630 }
1631 if (ret < 0) {
1632 fprintf(stderr, "ERROR: Overlap check failed\n");
1633 goto fail;
1634 }
1635
1636 ret = bdrv_pwrite_sync(bs->file, l2e_offset, &l2_table[idx],
1637 l2_entry_size(s));
1638 if (ret < 0) {
1639 fprintf(stderr, "ERROR: Failed to overwrite L2 "
1640 "table entry: %s\n", strerror(-ret));
1641 goto fail;
1642 }
1643
1644 res->corruptions--;
1645 res->corruptions_fixed++;
1646 return 0;
1647
1648fail:
1649 res->check_errors++;
1650 return ret;
1651}
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
1662 void **refcount_table,
1663 int64_t *refcount_table_size, int64_t l2_offset,
1664 int flags, BdrvCheckMode fix, bool active)
1665{
1666 BDRVQcow2State *s = bs->opaque;
1667 uint64_t l2_entry, l2_bitmap;
1668 uint64_t next_contiguous_offset = 0;
1669 int i, ret;
1670 size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
1671 g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
1672 bool metadata_overlap;
1673
1674
1675 ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size_bytes);
1676 if (ret < 0) {
1677 fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
1678 res->check_errors++;
1679 return ret;
1680 }
1681
1682
1683 for (i = 0; i < s->l2_size; i++) {
1684 uint64_t coffset;
1685 int csize;
1686 QCow2ClusterType type;
1687
1688 l2_entry = get_l2_entry(s, l2_table, i);
1689 l2_bitmap = get_l2_bitmap(s, l2_table, i);
1690 type = qcow2_get_cluster_type(bs, l2_entry);
1691
1692 if (type != QCOW2_CLUSTER_COMPRESSED) {
1693
1694 if (l2_entry & L2E_STD_RESERVED_MASK) {
1695 fprintf(stderr, "ERROR found l2 entry with reserved bits set: "
1696 "%" PRIx64 "\n", l2_entry);
1697 res->corruptions++;
1698 }
1699 }
1700
1701 switch (type) {
1702 case QCOW2_CLUSTER_COMPRESSED:
1703
1704 if (l2_entry & QCOW_OFLAG_COPIED) {
1705 fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": "
1706 "copied flag must never be set for compressed "
1707 "clusters\n", l2_entry & s->cluster_offset_mask);
1708 l2_entry &= ~QCOW_OFLAG_COPIED;
1709 res->corruptions++;
1710 }
1711
1712 if (has_data_file(bs)) {
1713 fprintf(stderr, "ERROR compressed cluster %d with data file, "
1714 "entry=0x%" PRIx64 "\n", i, l2_entry);
1715 res->corruptions++;
1716 break;
1717 }
1718
1719 if (l2_bitmap) {
1720 fprintf(stderr, "ERROR compressed cluster %d with non-zero "
1721 "subcluster allocation bitmap, entry=0x%" PRIx64 "\n",
1722 i, l2_entry);
1723 res->corruptions++;
1724 break;
1725 }
1726
1727
1728 qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
1729 ret = qcow2_inc_refcounts_imrt(
1730 bs, res, refcount_table, refcount_table_size, coffset, csize);
1731 if (ret < 0) {
1732 return ret;
1733 }
1734
1735 if (flags & CHECK_FRAG_INFO) {
1736 res->bfi.allocated_clusters++;
1737 res->bfi.compressed_clusters++;
1738
1739
1740
1741
1742
1743
1744
1745 res->bfi.fragmented_clusters++;
1746 }
1747 break;
1748
1749 case QCOW2_CLUSTER_ZERO_ALLOC:
1750 case QCOW2_CLUSTER_NORMAL:
1751 {
1752 uint64_t offset = l2_entry & L2E_OFFSET_MASK;
1753
1754 if ((l2_bitmap >> 32) & l2_bitmap) {
1755 res->corruptions++;
1756 fprintf(stderr, "ERROR offset=%" PRIx64 ": Allocated "
1757 "cluster has corrupted subcluster allocation bitmap\n",
1758 offset);
1759 }
1760
1761
1762 if (offset_into_cluster(s, offset)) {
1763 bool contains_data;
1764 res->corruptions++;
1765
1766 if (has_subclusters(s)) {
1767 contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC);
1768 } else {
1769 contains_data = !(l2_entry & QCOW_OFLAG_ZERO);
1770 }
1771
1772 if (!contains_data) {
1773 fprintf(stderr, "%s offset=%" PRIx64 ": Preallocated "
1774 "cluster is not properly aligned; L2 entry "
1775 "corrupted.\n",
1776 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR",
1777 offset);
1778 if (fix & BDRV_FIX_ERRORS) {
1779 ret = fix_l2_entry_by_zero(bs, res, l2_offset,
1780 l2_table, i, active,
1781 &metadata_overlap);
1782 if (metadata_overlap) {
1783
1784
1785
1786
1787 return ret;
1788 }
1789
1790 if (ret == 0) {
1791
1792
1793
1794
1795 continue;
1796 }
1797
1798
1799
1800
1801
1802
1803 }
1804 } else {
1805 fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is "
1806 "not properly aligned; L2 entry corrupted.\n", offset);
1807 }
1808 }
1809
1810 if (flags & CHECK_FRAG_INFO) {
1811 res->bfi.allocated_clusters++;
1812 if (next_contiguous_offset &&
1813 offset != next_contiguous_offset) {
1814 res->bfi.fragmented_clusters++;
1815 }
1816 next_contiguous_offset = offset + s->cluster_size;
1817 }
1818
1819
1820 if (!has_data_file(bs)) {
1821 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table,
1822 refcount_table_size,
1823 offset, s->cluster_size);
1824 if (ret < 0) {
1825 return ret;
1826 }
1827 }
1828 break;
1829 }
1830
1831 case QCOW2_CLUSTER_ZERO_PLAIN:
1832
1833 assert(!l2_bitmap);
1834 break;
1835
1836 case QCOW2_CLUSTER_UNALLOCATED:
1837 if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) {
1838 res->corruptions++;
1839 fprintf(stderr, "ERROR: Unallocated "
1840 "cluster has non-zero subcluster allocation map\n");
1841 }
1842 break;
1843
1844 default:
1845 abort();
1846 }
1847 }
1848
1849 return 0;
1850}
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860static int check_refcounts_l1(BlockDriverState *bs,
1861 BdrvCheckResult *res,
1862 void **refcount_table,
1863 int64_t *refcount_table_size,
1864 int64_t l1_table_offset, int l1_size,
1865 int flags, BdrvCheckMode fix, bool active)
1866{
1867 BDRVQcow2State *s = bs->opaque;
1868 size_t l1_size_bytes = l1_size * L1E_SIZE;
1869 g_autofree uint64_t *l1_table = NULL;
1870 uint64_t l2_offset;
1871 int i, ret;
1872
1873 if (!l1_size) {
1874 return 0;
1875 }
1876
1877
1878 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
1879 l1_table_offset, l1_size_bytes);
1880 if (ret < 0) {
1881 return ret;
1882 }
1883
1884 l1_table = g_try_malloc(l1_size_bytes);
1885 if (l1_table == NULL) {
1886 res->check_errors++;
1887 return -ENOMEM;
1888 }
1889
1890
1891 ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size_bytes);
1892 if (ret < 0) {
1893 fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
1894 res->check_errors++;
1895 return ret;
1896 }
1897
1898 for (i = 0; i < l1_size; i++) {
1899 be64_to_cpus(&l1_table[i]);
1900 }
1901
1902
1903 for (i = 0; i < l1_size; i++) {
1904 if (!l1_table[i]) {
1905 continue;
1906 }
1907
1908 if (l1_table[i] & L1E_RESERVED_MASK) {
1909 fprintf(stderr, "ERROR found L1 entry with reserved bits set: "
1910 "%" PRIx64 "\n", l1_table[i]);
1911 res->corruptions++;
1912 }
1913
1914 l2_offset = l1_table[i] & L1E_OFFSET_MASK;
1915
1916
1917 ret = qcow2_inc_refcounts_imrt(bs, res,
1918 refcount_table, refcount_table_size,
1919 l2_offset, s->cluster_size);
1920 if (ret < 0) {
1921 return ret;
1922 }
1923
1924
1925 if (offset_into_cluster(s, l2_offset)) {
1926 fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
1927 "cluster aligned; L1 entry corrupted\n", l2_offset);
1928 res->corruptions++;
1929 }
1930
1931
1932 ret = check_refcounts_l2(bs, res, refcount_table,
1933 refcount_table_size, l2_offset, flags,
1934 fix, active);
1935 if (ret < 0) {
1936 return ret;
1937 }
1938 }
1939
1940 return 0;
1941}
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
1952 BdrvCheckMode fix)
1953{
1954 BDRVQcow2State *s = bs->opaque;
1955 uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
1956 int ret;
1957 uint64_t refcount;
1958 int i, j;
1959 bool repair;
1960
1961 if (fix & BDRV_FIX_ERRORS) {
1962
1963 repair = true;
1964 } else if (fix & BDRV_FIX_LEAKS) {
1965
1966
1967
1968 repair = !res->check_errors && !res->corruptions && !res->leaks;
1969 } else {
1970 repair = false;
1971 }
1972
1973 for (i = 0; i < s->l1_size; i++) {
1974 uint64_t l1_entry = s->l1_table[i];
1975 uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
1976 int l2_dirty = 0;
1977
1978 if (!l2_offset) {
1979 continue;
1980 }
1981
1982 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
1983 &refcount);
1984 if (ret < 0) {
1985
1986 continue;
1987 }
1988 if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
1989 res->corruptions++;
1990 fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
1991 "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
1992 repair ? "Repairing" : "ERROR", i, l1_entry, refcount);
1993 if (repair) {
1994 s->l1_table[i] = refcount == 1
1995 ? l1_entry | QCOW_OFLAG_COPIED
1996 : l1_entry & ~QCOW_OFLAG_COPIED;
1997 ret = qcow2_write_l1_entry(bs, i);
1998 if (ret < 0) {
1999 res->check_errors++;
2000 goto fail;
2001 }
2002 res->corruptions--;
2003 res->corruptions_fixed++;
2004 }
2005 }
2006
2007 ret = bdrv_pread(bs->file, l2_offset, l2_table,
2008 s->l2_size * l2_entry_size(s));
2009 if (ret < 0) {
2010 fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
2011 strerror(-ret));
2012 res->check_errors++;
2013 goto fail;
2014 }
2015
2016 for (j = 0; j < s->l2_size; j++) {
2017 uint64_t l2_entry = get_l2_entry(s, l2_table, j);
2018 uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
2019 QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, l2_entry);
2020
2021 if (cluster_type == QCOW2_CLUSTER_NORMAL ||
2022 cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) {
2023 if (has_data_file(bs)) {
2024 refcount = 1;
2025 } else {
2026 ret = qcow2_get_refcount(bs,
2027 data_offset >> s->cluster_bits,
2028 &refcount);
2029 if (ret < 0) {
2030
2031 continue;
2032 }
2033 }
2034 if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
2035 res->corruptions++;
2036 fprintf(stderr, "%s OFLAG_COPIED data cluster: "
2037 "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
2038 repair ? "Repairing" : "ERROR", l2_entry, refcount);
2039 if (repair) {
2040 set_l2_entry(s, l2_table, j,
2041 refcount == 1 ?
2042 l2_entry | QCOW_OFLAG_COPIED :
2043 l2_entry & ~QCOW_OFLAG_COPIED);
2044 l2_dirty++;
2045 }
2046 }
2047 }
2048 }
2049
2050 if (l2_dirty > 0) {
2051 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
2052 l2_offset, s->cluster_size,
2053 false);
2054 if (ret < 0) {
2055 fprintf(stderr, "ERROR: Could not write L2 table; metadata "
2056 "overlap check failed: %s\n", strerror(-ret));
2057 res->check_errors++;
2058 goto fail;
2059 }
2060
2061 ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
2062 s->cluster_size);
2063 if (ret < 0) {
2064 fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
2065 strerror(-ret));
2066 res->check_errors++;
2067 goto fail;
2068 }
2069 res->corruptions -= l2_dirty;
2070 res->corruptions_fixed += l2_dirty;
2071 }
2072 }
2073
2074 ret = 0;
2075
2076fail:
2077 qemu_vfree(l2_table);
2078 return ret;
2079}
2080
2081
2082
2083
2084
2085static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
2086 BdrvCheckMode fix, bool *rebuild,
2087 void **refcount_table, int64_t *nb_clusters)
2088{
2089 BDRVQcow2State *s = bs->opaque;
2090 int64_t i, size;
2091 int ret;
2092
2093 for(i = 0; i < s->refcount_table_size; i++) {
2094 uint64_t offset, cluster;
2095 offset = s->refcount_table[i] & REFT_OFFSET_MASK;
2096 cluster = offset >> s->cluster_bits;
2097
2098 if (s->refcount_table[i] & REFT_RESERVED_MASK) {
2099 fprintf(stderr, "ERROR refcount table entry %" PRId64 " has "
2100 "reserved bits set\n", i);
2101 res->corruptions++;
2102 *rebuild = true;
2103 continue;
2104 }
2105
2106
2107 if (offset_into_cluster(s, offset)) {
2108 fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
2109 "cluster aligned; refcount table entry corrupted\n", i);
2110 res->corruptions++;
2111 *rebuild = true;
2112 continue;
2113 }
2114
2115 if (cluster >= *nb_clusters) {
2116 res->corruptions++;
2117 fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n",
2118 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
2119
2120 if (fix & BDRV_FIX_ERRORS) {
2121 int64_t new_nb_clusters;
2122 Error *local_err = NULL;
2123
2124 if (offset > INT64_MAX - s->cluster_size) {
2125 ret = -EINVAL;
2126 goto resize_fail;
2127 }
2128
2129 ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
2130 PREALLOC_MODE_OFF, 0, &local_err);
2131 if (ret < 0) {
2132 error_report_err(local_err);
2133 goto resize_fail;
2134 }
2135 size = bdrv_getlength(bs->file->bs);
2136 if (size < 0) {
2137 ret = size;
2138 goto resize_fail;
2139 }
2140
2141 new_nb_clusters = size_to_clusters(s, size);
2142 assert(new_nb_clusters >= *nb_clusters);
2143
2144 ret = realloc_refcount_array(s, refcount_table,
2145 nb_clusters, new_nb_clusters);
2146 if (ret < 0) {
2147 res->check_errors++;
2148 return ret;
2149 }
2150
2151 if (cluster >= *nb_clusters) {
2152 ret = -EINVAL;
2153 goto resize_fail;
2154 }
2155
2156 res->corruptions--;
2157 res->corruptions_fixed++;
2158 ret = qcow2_inc_refcounts_imrt(bs, res,
2159 refcount_table, nb_clusters,
2160 offset, s->cluster_size);
2161 if (ret < 0) {
2162 return ret;
2163 }
2164
2165
2166
2167 continue;
2168
2169resize_fail:
2170 *rebuild = true;
2171 fprintf(stderr, "ERROR could not resize image: %s\n",
2172 strerror(-ret));
2173 }
2174 continue;
2175 }
2176
2177 if (offset != 0) {
2178 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2179 offset, s->cluster_size);
2180 if (ret < 0) {
2181 return ret;
2182 }
2183 if (s->get_refcount(*refcount_table, cluster) != 1) {
2184 fprintf(stderr, "ERROR refcount block %" PRId64
2185 " refcount=%" PRIu64 "\n", i,
2186 s->get_refcount(*refcount_table, cluster));
2187 res->corruptions++;
2188 *rebuild = true;
2189 }
2190 }
2191 }
2192
2193 return 0;
2194}
2195
2196
2197
2198
2199static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2200 BdrvCheckMode fix, bool *rebuild,
2201 void **refcount_table, int64_t *nb_clusters)
2202{
2203 BDRVQcow2State *s = bs->opaque;
2204 int64_t i;
2205 QCowSnapshot *sn;
2206 int ret;
2207
2208 if (!*refcount_table) {
2209 int64_t old_size = 0;
2210 ret = realloc_refcount_array(s, refcount_table,
2211 &old_size, *nb_clusters);
2212 if (ret < 0) {
2213 res->check_errors++;
2214 return ret;
2215 }
2216 }
2217
2218
2219 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2220 0, s->cluster_size);
2221 if (ret < 0) {
2222 return ret;
2223 }
2224
2225
2226 ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
2227 s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO,
2228 fix, true);
2229 if (ret < 0) {
2230 return ret;
2231 }
2232
2233
2234 if (has_data_file(bs) && s->nb_snapshots) {
2235 fprintf(stderr, "ERROR %d snapshots in image with data file\n",
2236 s->nb_snapshots);
2237 res->corruptions++;
2238 }
2239
2240 for (i = 0; i < s->nb_snapshots; i++) {
2241 sn = s->snapshots + i;
2242 if (offset_into_cluster(s, sn->l1_table_offset)) {
2243 fprintf(stderr, "ERROR snapshot %s (%s) l1_offset=%#" PRIx64 ": "
2244 "L1 table is not cluster aligned; snapshot table entry "
2245 "corrupted\n", sn->id_str, sn->name, sn->l1_table_offset);
2246 res->corruptions++;
2247 continue;
2248 }
2249 if (sn->l1_size > QCOW_MAX_L1_SIZE / L1E_SIZE) {
2250 fprintf(stderr, "ERROR snapshot %s (%s) l1_size=%#" PRIx32 ": "
2251 "L1 table is too large; snapshot table entry corrupted\n",
2252 sn->id_str, sn->name, sn->l1_size);
2253 res->corruptions++;
2254 continue;
2255 }
2256 ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
2257 sn->l1_table_offset, sn->l1_size, 0, fix,
2258 false);
2259 if (ret < 0) {
2260 return ret;
2261 }
2262 }
2263 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2264 s->snapshots_offset, s->snapshots_size);
2265 if (ret < 0) {
2266 return ret;
2267 }
2268
2269
2270 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2271 s->refcount_table_offset,
2272 s->refcount_table_size *
2273 REFTABLE_ENTRY_SIZE);
2274 if (ret < 0) {
2275 return ret;
2276 }
2277
2278
2279 if (s->crypto_header.length) {
2280 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2281 s->crypto_header.offset,
2282 s->crypto_header.length);
2283 if (ret < 0) {
2284 return ret;
2285 }
2286 }
2287
2288
2289 ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters);
2290 if (ret < 0) {
2291 return ret;
2292 }
2293
2294 return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters);
2295}
2296
2297
2298
2299
2300
2301static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2302 BdrvCheckMode fix, bool *rebuild,
2303 int64_t *highest_cluster,
2304 void *refcount_table, int64_t nb_clusters)
2305{
2306 BDRVQcow2State *s = bs->opaque;
2307 int64_t i;
2308 uint64_t refcount1, refcount2;
2309 int ret;
2310
2311 for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
2312 ret = qcow2_get_refcount(bs, i, &refcount1);
2313 if (ret < 0) {
2314 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
2315 i, strerror(-ret));
2316 res->check_errors++;
2317 continue;
2318 }
2319
2320 refcount2 = s->get_refcount(refcount_table, i);
2321
2322 if (refcount1 > 0 || refcount2 > 0) {
2323 *highest_cluster = i;
2324 }
2325
2326 if (refcount1 != refcount2) {
2327
2328 int *num_fixed = NULL;
2329 if (refcount1 == 0) {
2330 *rebuild = true;
2331 } else if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
2332 num_fixed = &res->leaks_fixed;
2333 } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
2334 num_fixed = &res->corruptions_fixed;
2335 }
2336
2337 fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64
2338 " reference=%" PRIu64 "\n",
2339 num_fixed != NULL ? "Repairing" :
2340 refcount1 < refcount2 ? "ERROR" :
2341 "Leaked",
2342 i, refcount1, refcount2);
2343
2344 if (num_fixed) {
2345 ret = update_refcount(bs, i << s->cluster_bits, 1,
2346 refcount_diff(refcount1, refcount2),
2347 refcount1 > refcount2,
2348 QCOW2_DISCARD_ALWAYS);
2349 if (ret >= 0) {
2350 (*num_fixed)++;
2351 continue;
2352 }
2353 }
2354
2355
2356 if (refcount1 < refcount2) {
2357 res->corruptions++;
2358 } else {
2359 res->leaks++;
2360 }
2361 }
2362 }
2363}
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377static int64_t alloc_clusters_imrt(BlockDriverState *bs,
2378 int cluster_count,
2379 void **refcount_table,
2380 int64_t *imrt_nb_clusters,
2381 int64_t *first_free_cluster)
2382{
2383 BDRVQcow2State *s = bs->opaque;
2384 int64_t cluster = *first_free_cluster, i;
2385 bool first_gap = true;
2386 int contiguous_free_clusters;
2387 int ret;
2388
2389
2390
2391 for (contiguous_free_clusters = 0;
2392 cluster < *imrt_nb_clusters &&
2393 contiguous_free_clusters < cluster_count;
2394 cluster++)
2395 {
2396 if (!s->get_refcount(*refcount_table, cluster)) {
2397 contiguous_free_clusters++;
2398 if (first_gap) {
2399
2400
2401 *first_free_cluster = cluster;
2402 first_gap = false;
2403 }
2404 } else if (contiguous_free_clusters) {
2405 contiguous_free_clusters = 0;
2406 }
2407 }
2408
2409
2410
2411
2412
2413
2414
2415
2416 if (contiguous_free_clusters < cluster_count) {
2417
2418
2419
2420
2421
2422
2423 ret = realloc_refcount_array(s, refcount_table, imrt_nb_clusters,
2424 cluster + cluster_count
2425 - contiguous_free_clusters);
2426 if (ret < 0) {
2427 return ret;
2428 }
2429 }
2430
2431
2432 cluster -= contiguous_free_clusters;
2433 for (i = 0; i < cluster_count; i++) {
2434 s->set_refcount(*refcount_table, cluster + i, 1);
2435 }
2436
2437 return cluster << s->cluster_bits;
2438}
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448static int rebuild_refcount_structure(BlockDriverState *bs,
2449 BdrvCheckResult *res,
2450 void **refcount_table,
2451 int64_t *nb_clusters)
2452{
2453 BDRVQcow2State *s = bs->opaque;
2454 int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
2455 int64_t refblock_offset, refblock_start, refblock_index;
2456 uint32_t reftable_size = 0;
2457 uint64_t *on_disk_reftable = NULL;
2458 void *on_disk_refblock;
2459 int ret = 0;
2460 struct {
2461 uint64_t reftable_offset;
2462 uint32_t reftable_clusters;
2463 } QEMU_PACKED reftable_offset_and_clusters;
2464
2465 qcow2_cache_empty(bs, s->refcount_block_cache);
2466
2467write_refblocks:
2468 for (; cluster < *nb_clusters; cluster++) {
2469 if (!s->get_refcount(*refcount_table, cluster)) {
2470 continue;
2471 }
2472
2473 refblock_index = cluster >> s->refcount_block_bits;
2474 refblock_start = refblock_index << s->refcount_block_bits;
2475
2476
2477 if (first_free_cluster < refblock_start) {
2478 first_free_cluster = refblock_start;
2479 }
2480 refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
2481 nb_clusters, &first_free_cluster);
2482 if (refblock_offset < 0) {
2483 fprintf(stderr, "ERROR allocating refblock: %s\n",
2484 strerror(-refblock_offset));
2485 res->check_errors++;
2486 ret = refblock_offset;
2487 goto fail;
2488 }
2489
2490 if (reftable_size <= refblock_index) {
2491 uint32_t old_reftable_size = reftable_size;
2492 uint64_t *new_on_disk_reftable;
2493
2494 reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
2495 s->cluster_size) / REFTABLE_ENTRY_SIZE;
2496 new_on_disk_reftable = g_try_realloc(on_disk_reftable,
2497 reftable_size *
2498 REFTABLE_ENTRY_SIZE);
2499 if (!new_on_disk_reftable) {
2500 res->check_errors++;
2501 ret = -ENOMEM;
2502 goto fail;
2503 }
2504 on_disk_reftable = new_on_disk_reftable;
2505
2506 memset(on_disk_reftable + old_reftable_size, 0,
2507 (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE);
2508
2509
2510
2511
2512 reftable_offset = -1;
2513 } else {
2514 assert(on_disk_reftable);
2515 }
2516 on_disk_reftable[refblock_index] = refblock_offset;
2517
2518
2519
2520 if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits &&
2521 reftable_offset < 0)
2522 {
2523 uint64_t reftable_clusters = size_to_clusters(s, reftable_size *
2524 REFTABLE_ENTRY_SIZE);
2525 reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
2526 refcount_table, nb_clusters,
2527 &first_free_cluster);
2528 if (reftable_offset < 0) {
2529 fprintf(stderr, "ERROR allocating reftable: %s\n",
2530 strerror(-reftable_offset));
2531 res->check_errors++;
2532 ret = reftable_offset;
2533 goto fail;
2534 }
2535 }
2536
2537 ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
2538 s->cluster_size, false);
2539 if (ret < 0) {
2540 fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
2541 goto fail;
2542 }
2543
2544
2545
2546 on_disk_refblock = (void *)((char *) *refcount_table +
2547 refblock_index * s->cluster_size);
2548
2549 ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock,
2550 s->cluster_size);
2551 if (ret < 0) {
2552 fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
2553 goto fail;
2554 }
2555
2556
2557 cluster = refblock_start + s->refcount_block_size - 1;
2558 }
2559
2560 if (reftable_offset < 0) {
2561 uint64_t post_refblock_start, reftable_clusters;
2562
2563 post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size);
2564 reftable_clusters =
2565 size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE);
2566
2567 if (first_free_cluster < post_refblock_start) {
2568 first_free_cluster = post_refblock_start;
2569 }
2570 reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
2571 refcount_table, nb_clusters,
2572 &first_free_cluster);
2573 if (reftable_offset < 0) {
2574 fprintf(stderr, "ERROR allocating reftable: %s\n",
2575 strerror(-reftable_offset));
2576 res->check_errors++;
2577 ret = reftable_offset;
2578 goto fail;
2579 }
2580
2581 goto write_refblocks;
2582 }
2583
2584 for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
2585 cpu_to_be64s(&on_disk_reftable[refblock_index]);
2586 }
2587
2588 ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset,
2589 reftable_size * REFTABLE_ENTRY_SIZE,
2590 false);
2591 if (ret < 0) {
2592 fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
2593 goto fail;
2594 }
2595
2596 assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE);
2597 ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
2598 reftable_size * REFTABLE_ENTRY_SIZE);
2599 if (ret < 0) {
2600 fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
2601 goto fail;
2602 }
2603
2604
2605 reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
2606 reftable_offset_and_clusters.reftable_clusters =
2607 cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE));
2608 ret = bdrv_pwrite_sync(bs->file,
2609 offsetof(QCowHeader, refcount_table_offset),
2610 &reftable_offset_and_clusters,
2611 sizeof(reftable_offset_and_clusters));
2612 if (ret < 0) {
2613 fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
2614 goto fail;
2615 }
2616
2617 for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
2618 be64_to_cpus(&on_disk_reftable[refblock_index]);
2619 }
2620 s->refcount_table = on_disk_reftable;
2621 s->refcount_table_offset = reftable_offset;
2622 s->refcount_table_size = reftable_size;
2623 update_max_refcount_table_index(s);
2624
2625 return 0;
2626
2627fail:
2628 g_free(on_disk_reftable);
2629 return ret;
2630}
2631
2632
2633
2634
2635
2636
2637
2638int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2639 BdrvCheckMode fix)
2640{
2641 BDRVQcow2State *s = bs->opaque;
2642 BdrvCheckResult pre_compare_res;
2643 int64_t size, highest_cluster, nb_clusters;
2644 void *refcount_table = NULL;
2645 bool rebuild = false;
2646 int ret;
2647
2648 size = bdrv_getlength(bs->file->bs);
2649 if (size < 0) {
2650 res->check_errors++;
2651 return size;
2652 }
2653
2654 nb_clusters = size_to_clusters(s, size);
2655 if (nb_clusters > INT_MAX) {
2656 res->check_errors++;
2657 return -EFBIG;
2658 }
2659
2660 res->bfi.total_clusters =
2661 size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
2662
2663 ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table,
2664 &nb_clusters);
2665 if (ret < 0) {
2666 goto fail;
2667 }
2668
2669
2670
2671
2672 pre_compare_res = *res;
2673 compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcount_table,
2674 nb_clusters);
2675
2676 if (rebuild && (fix & BDRV_FIX_ERRORS)) {
2677 BdrvCheckResult old_res = *res;
2678 int fresh_leaks = 0;
2679
2680 fprintf(stderr, "Rebuilding refcount structure\n");
2681 ret = rebuild_refcount_structure(bs, res, &refcount_table,
2682 &nb_clusters);
2683 if (ret < 0) {
2684 goto fail;
2685 }
2686
2687 res->corruptions = 0;
2688 res->leaks = 0;
2689
2690
2691
2692 rebuild = false;
2693 memset(refcount_table, 0, refcount_array_byte_size(s, nb_clusters));
2694 ret = calculate_refcounts(bs, res, 0, &rebuild, &refcount_table,
2695 &nb_clusters);
2696 if (ret < 0) {
2697 goto fail;
2698 }
2699
2700 if (fix & BDRV_FIX_LEAKS) {
2701
2702
2703
2704 BdrvCheckResult saved_res = *res;
2705 *res = (BdrvCheckResult){ 0 };
2706
2707 compare_refcounts(bs, res, BDRV_FIX_LEAKS, &rebuild,
2708 &highest_cluster, refcount_table, nb_clusters);
2709 if (rebuild) {
2710 fprintf(stderr, "ERROR rebuilt refcount structure is still "
2711 "broken\n");
2712 }
2713
2714
2715
2716
2717 fresh_leaks = res->leaks;
2718 *res = saved_res;
2719 }
2720
2721 if (res->corruptions < old_res.corruptions) {
2722 res->corruptions_fixed += old_res.corruptions - res->corruptions;
2723 }
2724 if (res->leaks < old_res.leaks) {
2725 res->leaks_fixed += old_res.leaks - res->leaks;
2726 }
2727 res->leaks += fresh_leaks;
2728 } else if (fix) {
2729 if (rebuild) {
2730 fprintf(stderr, "ERROR need to rebuild refcount structures\n");
2731 res->check_errors++;
2732 ret = -EIO;
2733 goto fail;
2734 }
2735
2736 if (res->leaks || res->corruptions) {
2737 *res = pre_compare_res;
2738 compare_refcounts(bs, res, fix, &rebuild, &highest_cluster,
2739 refcount_table, nb_clusters);
2740 }
2741 }
2742
2743
2744 ret = check_oflag_copied(bs, res, fix);
2745 if (ret < 0) {
2746 goto fail;
2747 }
2748
2749 res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
2750 ret = 0;
2751
2752fail:
2753 g_free(refcount_table);
2754
2755 return ret;
2756}
2757
2758#define overlaps_with(ofs, sz) \
2759 ranges_overlap(offset, size, ofs, sz)
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
2776 int64_t size)
2777{
2778 BDRVQcow2State *s = bs->opaque;
2779 int chk = s->overlap_check & ~ign;
2780 int i, j;
2781
2782 if (!size) {
2783 return 0;
2784 }
2785
2786 if (chk & QCOW2_OL_MAIN_HEADER) {
2787 if (offset < s->cluster_size) {
2788 return QCOW2_OL_MAIN_HEADER;
2789 }
2790 }
2791
2792
2793 size = ROUND_UP(offset_into_cluster(s, offset) + size, s->cluster_size);
2794 offset = start_of_cluster(s, offset);
2795
2796 if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
2797 if (overlaps_with(s->l1_table_offset, s->l1_size * L1E_SIZE)) {
2798 return QCOW2_OL_ACTIVE_L1;
2799 }
2800 }
2801
2802 if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
2803 if (overlaps_with(s->refcount_table_offset,
2804 s->refcount_table_size * REFTABLE_ENTRY_SIZE)) {
2805 return QCOW2_OL_REFCOUNT_TABLE;
2806 }
2807 }
2808
2809 if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
2810 if (overlaps_with(s->snapshots_offset, s->snapshots_size)) {
2811 return QCOW2_OL_SNAPSHOT_TABLE;
2812 }
2813 }
2814
2815 if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
2816 for (i = 0; i < s->nb_snapshots; i++) {
2817 if (s->snapshots[i].l1_size &&
2818 overlaps_with(s->snapshots[i].l1_table_offset,
2819 s->snapshots[i].l1_size * L1E_SIZE)) {
2820 return QCOW2_OL_INACTIVE_L1;
2821 }
2822 }
2823 }
2824
2825 if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
2826 for (i = 0; i < s->l1_size; i++) {
2827 if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
2828 overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK,
2829 s->cluster_size)) {
2830 return QCOW2_OL_ACTIVE_L2;
2831 }
2832 }
2833 }
2834
2835 if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
2836 unsigned last_entry = s->max_refcount_table_index;
2837 assert(last_entry < s->refcount_table_size);
2838 assert(last_entry + 1 == s->refcount_table_size ||
2839 (s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0);
2840 for (i = 0; i <= last_entry; i++) {
2841 if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
2842 overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
2843 s->cluster_size)) {
2844 return QCOW2_OL_REFCOUNT_BLOCK;
2845 }
2846 }
2847 }
2848
2849 if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
2850 for (i = 0; i < s->nb_snapshots; i++) {
2851 uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
2852 uint32_t l1_sz = s->snapshots[i].l1_size;
2853 uint64_t l1_sz2 = l1_sz * L1E_SIZE;
2854 uint64_t *l1;
2855 int ret;
2856
2857 ret = qcow2_validate_table(bs, l1_ofs, l1_sz, L1E_SIZE,
2858 QCOW_MAX_L1_SIZE, "", NULL);
2859 if (ret < 0) {
2860 return ret;
2861 }
2862
2863 l1 = g_try_malloc(l1_sz2);
2864
2865 if (l1_sz2 && l1 == NULL) {
2866 return -ENOMEM;
2867 }
2868
2869 ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
2870 if (ret < 0) {
2871 g_free(l1);
2872 return ret;
2873 }
2874
2875 for (j = 0; j < l1_sz; j++) {
2876 uint64_t l2_ofs = be64_to_cpu(l1[j]) & L1E_OFFSET_MASK;
2877 if (l2_ofs && overlaps_with(l2_ofs, s->cluster_size)) {
2878 g_free(l1);
2879 return QCOW2_OL_INACTIVE_L2;
2880 }
2881 }
2882
2883 g_free(l1);
2884 }
2885 }
2886
2887 if ((chk & QCOW2_OL_BITMAP_DIRECTORY) &&
2888 (s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS))
2889 {
2890 if (overlaps_with(s->bitmap_directory_offset,
2891 s->bitmap_directory_size))
2892 {
2893 return QCOW2_OL_BITMAP_DIRECTORY;
2894 }
2895 }
2896
2897 return 0;
2898}
2899
2900static const char *metadata_ol_names[] = {
2901 [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header",
2902 [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table",
2903 [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table",
2904 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
2905 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
2906 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
2907 [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table",
2908 [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table",
2909 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = "bitmap directory",
2910};
2911QEMU_BUILD_BUG_ON(QCOW2_OL_MAX_BITNR != ARRAY_SIZE(metadata_ol_names));
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
2924 int64_t size, bool data_file)
2925{
2926 int ret;
2927
2928 if (data_file && has_data_file(bs)) {
2929 return 0;
2930 }
2931
2932 ret = qcow2_check_metadata_overlap(bs, ign, offset, size);
2933 if (ret < 0) {
2934 return ret;
2935 } else if (ret > 0) {
2936 int metadata_ol_bitnr = ctz32(ret);
2937 assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
2938
2939 qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid "
2940 "write on metadata (overlaps with %s)",
2941 metadata_ol_names[metadata_ol_bitnr]);
2942 return -EIO;
2943 }
2944
2945 return 0;
2946}
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
2959 uint64_t reftable_index, uint64_t *reftable_size,
2960 void *refblock, bool refblock_empty,
2961 bool *allocated, Error **errp);
2962
2963
2964
2965
2966
2967
2968static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
2969 uint64_t reftable_index, uint64_t *reftable_size,
2970 void *refblock, bool refblock_empty, bool *allocated,
2971 Error **errp)
2972{
2973 BDRVQcow2State *s = bs->opaque;
2974 int64_t offset;
2975
2976 if (!refblock_empty && reftable_index >= *reftable_size) {
2977 uint64_t *new_reftable;
2978 uint64_t new_reftable_size;
2979
2980 new_reftable_size = ROUND_UP(reftable_index + 1,
2981 s->cluster_size / REFTABLE_ENTRY_SIZE);
2982 if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / REFTABLE_ENTRY_SIZE) {
2983 error_setg(errp,
2984 "This operation would make the refcount table grow "
2985 "beyond the maximum size supported by QEMU, aborting");
2986 return -ENOTSUP;
2987 }
2988
2989 new_reftable = g_try_realloc(*reftable, new_reftable_size *
2990 REFTABLE_ENTRY_SIZE);
2991 if (!new_reftable) {
2992 error_setg(errp, "Failed to increase reftable buffer size");
2993 return -ENOMEM;
2994 }
2995
2996 memset(new_reftable + *reftable_size, 0,
2997 (new_reftable_size - *reftable_size) * REFTABLE_ENTRY_SIZE);
2998
2999 *reftable = new_reftable;
3000 *reftable_size = new_reftable_size;
3001 }
3002
3003 if (!refblock_empty && !(*reftable)[reftable_index]) {
3004 offset = qcow2_alloc_clusters(bs, s->cluster_size);
3005 if (offset < 0) {
3006 error_setg_errno(errp, -offset, "Failed to allocate refblock");
3007 return offset;
3008 }
3009 (*reftable)[reftable_index] = offset;
3010 *allocated = true;
3011 }
3012
3013 return 0;
3014}
3015
3016
3017
3018
3019
3020
3021static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
3022 uint64_t reftable_index, uint64_t *reftable_size,
3023 void *refblock, bool refblock_empty, bool *allocated,
3024 Error **errp)
3025{
3026 BDRVQcow2State *s = bs->opaque;
3027 int64_t offset;
3028 int ret;
3029
3030 if (reftable_index < *reftable_size && (*reftable)[reftable_index]) {
3031 offset = (*reftable)[reftable_index];
3032
3033 ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size,
3034 false);
3035 if (ret < 0) {
3036 error_setg_errno(errp, -ret, "Overlap check failed");
3037 return ret;
3038 }
3039
3040 ret = bdrv_pwrite(bs->file, offset, refblock, s->cluster_size);
3041 if (ret < 0) {
3042 error_setg_errno(errp, -ret, "Failed to write refblock");
3043 return ret;
3044 }
3045 } else {
3046 assert(refblock_empty);
3047 }
3048
3049 return 0;
3050}
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
3066 uint64_t *new_reftable_index,
3067 uint64_t *new_reftable_size,
3068 void *new_refblock, int new_refblock_size,
3069 int new_refcount_bits,
3070 RefblockFinishOp *operation, bool *allocated,
3071 Qcow2SetRefcountFunc *new_set_refcount,
3072 BlockDriverAmendStatusCB *status_cb,
3073 void *cb_opaque, int index, int total,
3074 Error **errp)
3075{
3076 BDRVQcow2State *s = bs->opaque;
3077 uint64_t reftable_index;
3078 bool new_refblock_empty = true;
3079 int refblock_index;
3080 int new_refblock_index = 0;
3081 int ret;
3082
3083 for (reftable_index = 0; reftable_index < s->refcount_table_size;
3084 reftable_index++)
3085 {
3086 uint64_t refblock_offset = s->refcount_table[reftable_index]
3087 & REFT_OFFSET_MASK;
3088
3089 status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index,
3090 (uint64_t)total * s->refcount_table_size, cb_opaque);
3091
3092 if (refblock_offset) {
3093 void *refblock;
3094
3095 if (offset_into_cluster(s, refblock_offset)) {
3096 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
3097 PRIx64 " unaligned (reftable index: %#"
3098 PRIx64 ")", refblock_offset,
3099 reftable_index);
3100 error_setg(errp,
3101 "Image is corrupt (unaligned refblock offset)");
3102 return -EIO;
3103 }
3104
3105 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset,
3106 &refblock);
3107 if (ret < 0) {
3108 error_setg_errno(errp, -ret, "Failed to retrieve refblock");
3109 return ret;
3110 }
3111
3112 for (refblock_index = 0; refblock_index < s->refcount_block_size;
3113 refblock_index++)
3114 {
3115 uint64_t refcount;
3116
3117 if (new_refblock_index >= new_refblock_size) {
3118
3119 ret = operation(bs, new_reftable, *new_reftable_index,
3120 new_reftable_size, new_refblock,
3121 new_refblock_empty, allocated, errp);
3122 if (ret < 0) {
3123 qcow2_cache_put(s->refcount_block_cache, &refblock);
3124 return ret;
3125 }
3126
3127 (*new_reftable_index)++;
3128 new_refblock_index = 0;
3129 new_refblock_empty = true;
3130 }
3131
3132 refcount = s->get_refcount(refblock, refblock_index);
3133 if (new_refcount_bits < 64 && refcount >> new_refcount_bits) {
3134 uint64_t offset;
3135
3136 qcow2_cache_put(s->refcount_block_cache, &refblock);
3137
3138 offset = ((reftable_index << s->refcount_block_bits)
3139 + refblock_index) << s->cluster_bits;
3140
3141 error_setg(errp, "Cannot decrease refcount entry width to "
3142 "%i bits: Cluster at offset %#" PRIx64 " has a "
3143 "refcount of %" PRIu64, new_refcount_bits,
3144 offset, refcount);
3145 return -EINVAL;
3146 }
3147
3148 if (new_set_refcount) {
3149 new_set_refcount(new_refblock, new_refblock_index++,
3150 refcount);
3151 } else {
3152 new_refblock_index++;
3153 }
3154 new_refblock_empty = new_refblock_empty && refcount == 0;
3155 }
3156
3157 qcow2_cache_put(s->refcount_block_cache, &refblock);
3158 } else {
3159
3160 for (refblock_index = 0; refblock_index < s->refcount_block_size;
3161 refblock_index++)
3162 {
3163 if (new_refblock_index >= new_refblock_size) {
3164
3165 ret = operation(bs, new_reftable, *new_reftable_index,
3166 new_reftable_size, new_refblock,
3167 new_refblock_empty, allocated, errp);
3168 if (ret < 0) {
3169 return ret;
3170 }
3171
3172 (*new_reftable_index)++;
3173 new_refblock_index = 0;
3174 new_refblock_empty = true;
3175 }
3176
3177 if (new_set_refcount) {
3178 new_set_refcount(new_refblock, new_refblock_index++, 0);
3179 } else {
3180 new_refblock_index++;
3181 }
3182 }
3183 }
3184 }
3185
3186 if (new_refblock_index > 0) {
3187
3188 if (new_set_refcount) {
3189 for (; new_refblock_index < new_refblock_size;
3190 new_refblock_index++)
3191 {
3192 new_set_refcount(new_refblock, new_refblock_index, 0);
3193 }
3194 }
3195
3196 ret = operation(bs, new_reftable, *new_reftable_index,
3197 new_reftable_size, new_refblock, new_refblock_empty,
3198 allocated, errp);
3199 if (ret < 0) {
3200 return ret;
3201 }
3202
3203 (*new_reftable_index)++;
3204 }
3205
3206 status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size,
3207 (uint64_t)total * s->refcount_table_size, cb_opaque);
3208
3209 return 0;
3210}
3211
3212int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
3213 BlockDriverAmendStatusCB *status_cb,
3214 void *cb_opaque, Error **errp)
3215{
3216 BDRVQcow2State *s = bs->opaque;
3217 Qcow2GetRefcountFunc *new_get_refcount;
3218 Qcow2SetRefcountFunc *new_set_refcount;
3219 void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size);
3220 uint64_t *new_reftable = NULL, new_reftable_size = 0;
3221 uint64_t *old_reftable, old_reftable_size, old_reftable_offset;
3222 uint64_t new_reftable_index = 0;
3223 uint64_t i;
3224 int64_t new_reftable_offset = 0, allocated_reftable_size = 0;
3225 int new_refblock_size, new_refcount_bits = 1 << refcount_order;
3226 int old_refcount_order;
3227 int walk_index = 0;
3228 int ret;
3229 bool new_allocation;
3230
3231 assert(s->qcow_version >= 3);
3232 assert(refcount_order >= 0 && refcount_order <= 6);
3233
3234
3235 new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3));
3236
3237 new_get_refcount = get_refcount_funcs[refcount_order];
3238 new_set_refcount = set_refcount_funcs[refcount_order];
3239
3240
3241 do {
3242 int total_walks;
3243
3244 new_allocation = false;
3245
3246
3247
3248
3249
3250
3251 total_walks = MAX(walk_index + 2, 3);
3252
3253
3254
3255 ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
3256 &new_reftable_size, NULL, new_refblock_size,
3257 new_refcount_bits, &alloc_refblock,
3258 &new_allocation, NULL, status_cb, cb_opaque,
3259 walk_index++, total_walks, errp);
3260 if (ret < 0) {
3261 goto done;
3262 }
3263
3264 new_reftable_index = 0;
3265
3266 if (new_allocation) {
3267 if (new_reftable_offset) {
3268 qcow2_free_clusters(
3269 bs, new_reftable_offset,
3270 allocated_reftable_size * REFTABLE_ENTRY_SIZE,
3271 QCOW2_DISCARD_NEVER);
3272 }
3273
3274 new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size *
3275 REFTABLE_ENTRY_SIZE);
3276 if (new_reftable_offset < 0) {
3277 error_setg_errno(errp, -new_reftable_offset,
3278 "Failed to allocate the new reftable");
3279 ret = new_reftable_offset;
3280 goto done;
3281 }
3282 allocated_reftable_size = new_reftable_size;
3283 }
3284 } while (new_allocation);
3285
3286
3287 ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
3288 &new_reftable_size, new_refblock,
3289 new_refblock_size, new_refcount_bits,
3290 &flush_refblock, &new_allocation, new_set_refcount,
3291 status_cb, cb_opaque, walk_index, walk_index + 1,
3292 errp);
3293 if (ret < 0) {
3294 goto done;
3295 }
3296 assert(!new_allocation);
3297
3298
3299
3300 ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset,
3301 new_reftable_size * REFTABLE_ENTRY_SIZE,
3302 false);
3303 if (ret < 0) {
3304 error_setg_errno(errp, -ret, "Overlap check failed");
3305 goto done;
3306 }
3307
3308 for (i = 0; i < new_reftable_size; i++) {
3309 cpu_to_be64s(&new_reftable[i]);
3310 }
3311
3312 ret = bdrv_pwrite(bs->file, new_reftable_offset, new_reftable,
3313 new_reftable_size * REFTABLE_ENTRY_SIZE);
3314
3315 for (i = 0; i < new_reftable_size; i++) {
3316 be64_to_cpus(&new_reftable[i]);
3317 }
3318
3319 if (ret < 0) {
3320 error_setg_errno(errp, -ret, "Failed to write the new reftable");
3321 goto done;
3322 }
3323
3324
3325
3326 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
3327 if (ret < 0) {
3328 error_setg_errno(errp, -ret, "Failed to flush the refblock cache");
3329 goto done;
3330 }
3331
3332
3333
3334
3335
3336 old_refcount_order = s->refcount_order;
3337 old_reftable_size = s->refcount_table_size;
3338 old_reftable_offset = s->refcount_table_offset;
3339
3340 s->refcount_order = refcount_order;
3341 s->refcount_table_size = new_reftable_size;
3342 s->refcount_table_offset = new_reftable_offset;
3343
3344 ret = qcow2_update_header(bs);
3345 if (ret < 0) {
3346 s->refcount_order = old_refcount_order;
3347 s->refcount_table_size = old_reftable_size;
3348 s->refcount_table_offset = old_reftable_offset;
3349 error_setg_errno(errp, -ret, "Failed to update the qcow2 header");
3350 goto done;
3351 }
3352
3353
3354 old_reftable = s->refcount_table;
3355 s->refcount_table = new_reftable;
3356 update_max_refcount_table_index(s);
3357
3358 s->refcount_bits = 1 << refcount_order;
3359 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
3360 s->refcount_max += s->refcount_max - 1;
3361
3362 s->refcount_block_bits = s->cluster_bits - (refcount_order - 3);
3363 s->refcount_block_size = 1 << s->refcount_block_bits;
3364
3365 s->get_refcount = new_get_refcount;
3366 s->set_refcount = new_set_refcount;
3367
3368
3369
3370 new_reftable = old_reftable;
3371 new_reftable_size = old_reftable_size;
3372 new_reftable_offset = old_reftable_offset;
3373
3374done:
3375 if (new_reftable) {
3376
3377
3378
3379 for (i = 0; i < new_reftable_size; i++) {
3380 uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK;
3381 if (offset) {
3382 qcow2_free_clusters(bs, offset, s->cluster_size,
3383 QCOW2_DISCARD_OTHER);
3384 }
3385 }
3386 g_free(new_reftable);
3387
3388 if (new_reftable_offset > 0) {
3389 qcow2_free_clusters(bs, new_reftable_offset,
3390 new_reftable_size * REFTABLE_ENTRY_SIZE,
3391 QCOW2_DISCARD_OTHER);
3392 }
3393 }
3394
3395 qemu_vfree(new_refblock);
3396 return ret;
3397}
3398
3399static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset)
3400{
3401 BDRVQcow2State *s = bs->opaque;
3402 uint32_t index = offset_to_reftable_index(s, offset);
3403 int64_t covering_refblock_offset = 0;
3404
3405 if (index < s->refcount_table_size) {
3406 covering_refblock_offset = s->refcount_table[index] & REFT_OFFSET_MASK;
3407 }
3408 if (!covering_refblock_offset) {
3409 qcow2_signal_corruption(bs, true, -1, -1, "Refblock at %#" PRIx64 " is "
3410 "not covered by the refcount structures",
3411 offset);
3412 return -EIO;
3413 }
3414
3415 return covering_refblock_offset;
3416}
3417
3418static int qcow2_discard_refcount_block(BlockDriverState *bs,
3419 uint64_t discard_block_offs)
3420{
3421 BDRVQcow2State *s = bs->opaque;
3422 int64_t refblock_offs;
3423 uint64_t cluster_index = discard_block_offs >> s->cluster_bits;
3424 uint32_t block_index = cluster_index & (s->refcount_block_size - 1);
3425 void *refblock;
3426 int ret;
3427
3428 refblock_offs = get_refblock_offset(bs, discard_block_offs);
3429 if (refblock_offs < 0) {
3430 return refblock_offs;
3431 }
3432
3433 assert(discard_block_offs != 0);
3434
3435 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
3436 &refblock);
3437 if (ret < 0) {
3438 return ret;
3439 }
3440
3441 if (s->get_refcount(refblock, block_index) != 1) {
3442 qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:"
3443 " refblock offset %#" PRIx64
3444 ", reftable index %u"
3445 ", block offset %#" PRIx64
3446 ", refcount %#" PRIx64,
3447 refblock_offs,
3448 offset_to_reftable_index(s, discard_block_offs),
3449 discard_block_offs,
3450 s->get_refcount(refblock, block_index));
3451 qcow2_cache_put(s->refcount_block_cache, &refblock);
3452 return -EINVAL;
3453 }
3454 s->set_refcount(refblock, block_index, 0);
3455
3456 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock);
3457
3458 qcow2_cache_put(s->refcount_block_cache, &refblock);
3459
3460 if (cluster_index < s->free_cluster_index) {
3461 s->free_cluster_index = cluster_index;
3462 }
3463
3464 refblock = qcow2_cache_is_table_offset(s->refcount_block_cache,
3465 discard_block_offs);
3466 if (refblock) {
3467
3468 qcow2_cache_discard(s->refcount_block_cache, refblock);
3469 }
3470 update_refcount_discard(bs, discard_block_offs, s->cluster_size);
3471
3472 return 0;
3473}
3474
3475int qcow2_shrink_reftable(BlockDriverState *bs)
3476{
3477 BDRVQcow2State *s = bs->opaque;
3478 uint64_t *reftable_tmp =
3479 g_malloc(s->refcount_table_size * REFTABLE_ENTRY_SIZE);
3480 int i, ret;
3481
3482 for (i = 0; i < s->refcount_table_size; i++) {
3483 int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK;
3484 void *refblock;
3485 bool unused_block;
3486
3487 if (refblock_offs == 0) {
3488 reftable_tmp[i] = 0;
3489 continue;
3490 }
3491 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
3492 &refblock);
3493 if (ret < 0) {
3494 goto out;
3495 }
3496
3497
3498 if (i == offset_to_reftable_index(s, refblock_offs)) {
3499 uint64_t block_index = (refblock_offs >> s->cluster_bits) &
3500 (s->refcount_block_size - 1);
3501 uint64_t refcount = s->get_refcount(refblock, block_index);
3502
3503 s->set_refcount(refblock, block_index, 0);
3504
3505 unused_block = buffer_is_zero(refblock, s->cluster_size);
3506
3507 s->set_refcount(refblock, block_index, refcount);
3508 } else {
3509 unused_block = buffer_is_zero(refblock, s->cluster_size);
3510 }
3511 qcow2_cache_put(s->refcount_block_cache, &refblock);
3512
3513 reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
3514 }
3515
3516 ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp,
3517 s->refcount_table_size * REFTABLE_ENTRY_SIZE);
3518
3519
3520
3521
3522
3523 for (i = 0; i < s->refcount_table_size; i++) {
3524 if (s->refcount_table[i] && !reftable_tmp[i]) {
3525 if (ret == 0) {
3526 ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] &
3527 REFT_OFFSET_MASK);
3528 }
3529 s->refcount_table[i] = 0;
3530 }
3531 }
3532
3533 if (!s->cache_discards) {
3534 qcow2_process_discards(bs, ret);
3535 }
3536
3537out:
3538 g_free(reftable_tmp);
3539 return ret;
3540}
3541
3542int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
3543{
3544 BDRVQcow2State *s = bs->opaque;
3545 int64_t i;
3546
3547 for (i = size_to_clusters(s, size) - 1; i >= 0; i--) {
3548 uint64_t refcount;
3549 int ret = qcow2_get_refcount(bs, i, &refcount);
3550 if (ret < 0) {
3551 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
3552 i, strerror(-ret));
3553 return ret;
3554 }
3555 if (refcount > 0) {
3556 return i;
3557 }
3558 }
3559 qcow2_signal_corruption(bs, true, -1, -1,
3560 "There are no references in the refcount table.");
3561 return -EIO;
3562}
3563
3564int qcow2_detect_metadata_preallocation(BlockDriverState *bs)
3565{
3566 BDRVQcow2State *s = bs->opaque;
3567 int64_t i, end_cluster, cluster_count = 0, threshold;
3568 int64_t file_length, real_allocation, real_clusters;
3569
3570 qemu_co_mutex_assert_locked(&s->lock);
3571
3572 file_length = bdrv_getlength(bs->file->bs);
3573 if (file_length < 0) {
3574 return file_length;
3575 }
3576
3577 real_allocation = bdrv_get_allocated_file_size(bs->file->bs);
3578 if (real_allocation < 0) {
3579 return real_allocation;
3580 }
3581
3582 real_clusters = real_allocation / s->cluster_size;
3583 threshold = MAX(real_clusters * 10 / 9, real_clusters + 2);
3584
3585 end_cluster = size_to_clusters(s, file_length);
3586 for (i = 0; i < end_cluster && cluster_count < threshold; i++) {
3587 uint64_t refcount;
3588 int ret = qcow2_get_refcount(bs, i, &refcount);
3589 if (ret < 0) {
3590 return ret;
3591 }
3592 cluster_count += !!refcount;
3593 }
3594
3595 return cluster_count >= threshold;
3596}
3597