1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "block/block-io.h"
27#include "qapi/error.h"
28#include "qcow2.h"
29#include "qemu/range.h"
30#include "qemu/bswap.h"
31#include "qemu/cutils.h"
32#include "qemu/memalign.h"
33#include "trace.h"
34
35static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
36 uint64_t max);
37
38G_GNUC_WARN_UNUSED_RESULT
39static int update_refcount(BlockDriverState *bs,
40 int64_t offset, int64_t length, uint64_t addend,
41 bool decrease, enum qcow2_discard_type type);
42
43static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index);
44static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index);
45static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index);
46static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index);
47static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index);
48static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index);
49static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index);
50
51static void set_refcount_ro0(void *refcount_array, uint64_t index,
52 uint64_t value);
53static void set_refcount_ro1(void *refcount_array, uint64_t index,
54 uint64_t value);
55static void set_refcount_ro2(void *refcount_array, uint64_t index,
56 uint64_t value);
57static void set_refcount_ro3(void *refcount_array, uint64_t index,
58 uint64_t value);
59static void set_refcount_ro4(void *refcount_array, uint64_t index,
60 uint64_t value);
61static void set_refcount_ro5(void *refcount_array, uint64_t index,
62 uint64_t value);
63static void set_refcount_ro6(void *refcount_array, uint64_t index,
64 uint64_t value);
65
66
67static Qcow2GetRefcountFunc *const get_refcount_funcs[] = {
68 &get_refcount_ro0,
69 &get_refcount_ro1,
70 &get_refcount_ro2,
71 &get_refcount_ro3,
72 &get_refcount_ro4,
73 &get_refcount_ro5,
74 &get_refcount_ro6
75};
76
77static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
78 &set_refcount_ro0,
79 &set_refcount_ro1,
80 &set_refcount_ro2,
81 &set_refcount_ro3,
82 &set_refcount_ro4,
83 &set_refcount_ro5,
84 &set_refcount_ro6
85};
86
87
88
89
90
91static void update_max_refcount_table_index(BDRVQcow2State *s)
92{
93 unsigned i = s->refcount_table_size - 1;
94 while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) {
95 i--;
96 }
97
98 s->max_refcount_table_index = i;
99}
100
101int coroutine_fn qcow2_refcount_init(BlockDriverState *bs)
102{
103 BDRVQcow2State *s = bs->opaque;
104 unsigned int refcount_table_size2, i;
105 int ret;
106
107 assert(s->refcount_order >= 0 && s->refcount_order <= 6);
108
109 s->get_refcount = get_refcount_funcs[s->refcount_order];
110 s->set_refcount = set_refcount_funcs[s->refcount_order];
111
112 assert(s->refcount_table_size <= INT_MAX / REFTABLE_ENTRY_SIZE);
113 refcount_table_size2 = s->refcount_table_size * REFTABLE_ENTRY_SIZE;
114 s->refcount_table = g_try_malloc(refcount_table_size2);
115
116 if (s->refcount_table_size > 0) {
117 if (s->refcount_table == NULL) {
118 ret = -ENOMEM;
119 goto fail;
120 }
121 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
122 ret = bdrv_co_pread(bs->file, s->refcount_table_offset,
123 refcount_table_size2, s->refcount_table, 0);
124 if (ret < 0) {
125 goto fail;
126 }
127 for(i = 0; i < s->refcount_table_size; i++)
128 be64_to_cpus(&s->refcount_table[i]);
129 update_max_refcount_table_index(s);
130 }
131 return 0;
132 fail:
133 return ret;
134}
135
136void qcow2_refcount_close(BlockDriverState *bs)
137{
138 BDRVQcow2State *s = bs->opaque;
139 g_free(s->refcount_table);
140}
141
142
143static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index)
144{
145 return (((const uint8_t *)refcount_array)[index / 8] >> (index % 8)) & 0x1;
146}
147
148static void set_refcount_ro0(void *refcount_array, uint64_t index,
149 uint64_t value)
150{
151 assert(!(value >> 1));
152 ((uint8_t *)refcount_array)[index / 8] &= ~(0x1 << (index % 8));
153 ((uint8_t *)refcount_array)[index / 8] |= value << (index % 8);
154}
155
156static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index)
157{
158 return (((const uint8_t *)refcount_array)[index / 4] >> (2 * (index % 4)))
159 & 0x3;
160}
161
162static void set_refcount_ro1(void *refcount_array, uint64_t index,
163 uint64_t value)
164{
165 assert(!(value >> 2));
166 ((uint8_t *)refcount_array)[index / 4] &= ~(0x3 << (2 * (index % 4)));
167 ((uint8_t *)refcount_array)[index / 4] |= value << (2 * (index % 4));
168}
169
170static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index)
171{
172 return (((const uint8_t *)refcount_array)[index / 2] >> (4 * (index % 2)))
173 & 0xf;
174}
175
176static void set_refcount_ro2(void *refcount_array, uint64_t index,
177 uint64_t value)
178{
179 assert(!(value >> 4));
180 ((uint8_t *)refcount_array)[index / 2] &= ~(0xf << (4 * (index % 2)));
181 ((uint8_t *)refcount_array)[index / 2] |= value << (4 * (index % 2));
182}
183
184static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index)
185{
186 return ((const uint8_t *)refcount_array)[index];
187}
188
189static void set_refcount_ro3(void *refcount_array, uint64_t index,
190 uint64_t value)
191{
192 assert(!(value >> 8));
193 ((uint8_t *)refcount_array)[index] = value;
194}
195
196static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index)
197{
198 return be16_to_cpu(((const uint16_t *)refcount_array)[index]);
199}
200
201static void set_refcount_ro4(void *refcount_array, uint64_t index,
202 uint64_t value)
203{
204 assert(!(value >> 16));
205 ((uint16_t *)refcount_array)[index] = cpu_to_be16(value);
206}
207
208static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index)
209{
210 return be32_to_cpu(((const uint32_t *)refcount_array)[index]);
211}
212
213static void set_refcount_ro5(void *refcount_array, uint64_t index,
214 uint64_t value)
215{
216 assert(!(value >> 32));
217 ((uint32_t *)refcount_array)[index] = cpu_to_be32(value);
218}
219
220static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index)
221{
222 return be64_to_cpu(((const uint64_t *)refcount_array)[index]);
223}
224
225static void set_refcount_ro6(void *refcount_array, uint64_t index,
226 uint64_t value)
227{
228 ((uint64_t *)refcount_array)[index] = cpu_to_be64(value);
229}
230
231
232static int load_refcount_block(BlockDriverState *bs,
233 int64_t refcount_block_offset,
234 void **refcount_block)
235{
236 BDRVQcow2State *s = bs->opaque;
237
238 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
239 return qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
240 refcount_block);
241}
242
243
244
245
246
247int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
248 uint64_t *refcount)
249{
250 BDRVQcow2State *s = bs->opaque;
251 uint64_t refcount_table_index, block_index;
252 int64_t refcount_block_offset;
253 int ret;
254 void *refcount_block;
255
256 refcount_table_index = cluster_index >> s->refcount_block_bits;
257 if (refcount_table_index >= s->refcount_table_size) {
258 *refcount = 0;
259 return 0;
260 }
261 refcount_block_offset =
262 s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
263 if (!refcount_block_offset) {
264 *refcount = 0;
265 return 0;
266 }
267
268 if (offset_into_cluster(s, refcount_block_offset)) {
269 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64
270 " unaligned (reftable index: %#" PRIx64 ")",
271 refcount_block_offset, refcount_table_index);
272 return -EIO;
273 }
274
275 ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
276 &refcount_block);
277 if (ret < 0) {
278 return ret;
279 }
280
281 block_index = cluster_index & (s->refcount_block_size - 1);
282 *refcount = s->get_refcount(refcount_block, block_index);
283
284 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
285
286 return 0;
287}
288
289
290static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
291 uint64_t offset_b)
292{
293 uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits);
294 uint64_t block_b = offset_b >> (s->cluster_bits + s->refcount_block_bits);
295
296 return (block_a == block_b);
297}
298
299
300
301
302
303
304
305static int alloc_refcount_block(BlockDriverState *bs,
306 int64_t cluster_index, void **refcount_block)
307{
308 BDRVQcow2State *s = bs->opaque;
309 unsigned int refcount_table_index;
310 int64_t ret;
311
312 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
313
314
315 refcount_table_index = cluster_index >> s->refcount_block_bits;
316
317 if (refcount_table_index < s->refcount_table_size) {
318
319 uint64_t refcount_block_offset =
320 s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
321
322
323 if (refcount_block_offset) {
324 if (offset_into_cluster(s, refcount_block_offset)) {
325 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
326 PRIx64 " unaligned (reftable index: "
327 "%#x)", refcount_block_offset,
328 refcount_table_index);
329 return -EIO;
330 }
331
332 return load_refcount_block(bs, refcount_block_offset,
333 refcount_block);
334 }
335 }
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360 *refcount_block = NULL;
361
362
363 ret = qcow2_cache_flush(bs, s->l2_table_cache);
364 if (ret < 0) {
365 return ret;
366 }
367
368
369 int64_t new_block = alloc_clusters_noref(bs, s->cluster_size, INT64_MAX);
370 if (new_block < 0) {
371 return new_block;
372 }
373
374
375 assert((new_block & REFT_OFFSET_MASK) == new_block);
376
377
378 if (new_block == 0) {
379 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
380 "allocation of refcount block at offset 0");
381 return -EIO;
382 }
383
384#ifdef DEBUG_ALLOC2
385 fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
386 " at %" PRIx64 "\n",
387 refcount_table_index, cluster_index << s->cluster_bits, new_block);
388#endif
389
390 if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
391
392 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
393 refcount_block);
394 if (ret < 0) {
395 goto fail;
396 }
397
398 memset(*refcount_block, 0, s->cluster_size);
399
400
401 int block_index = (new_block >> s->cluster_bits) &
402 (s->refcount_block_size - 1);
403 s->set_refcount(*refcount_block, block_index, 1);
404 } else {
405
406
407 ret = update_refcount(bs, new_block, s->cluster_size, 1, false,
408 QCOW2_DISCARD_NEVER);
409 if (ret < 0) {
410 goto fail;
411 }
412
413 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
414 if (ret < 0) {
415 goto fail;
416 }
417
418
419
420 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
421 refcount_block);
422 if (ret < 0) {
423 goto fail;
424 }
425
426 memset(*refcount_block, 0, s->cluster_size);
427 }
428
429
430 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
431 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
432 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
433 if (ret < 0) {
434 goto fail;
435 }
436
437
438 if (refcount_table_index < s->refcount_table_size) {
439 uint64_t data64 = cpu_to_be64(new_block);
440 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
441 ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
442 refcount_table_index * REFTABLE_ENTRY_SIZE,
443 sizeof(data64), &data64, 0);
444 if (ret < 0) {
445 goto fail;
446 }
447
448 s->refcount_table[refcount_table_index] = new_block;
449
450
451 s->max_refcount_table_index =
452 MAX(s->max_refcount_table_index, refcount_table_index);
453
454
455
456 return -EAGAIN;
457 }
458
459 qcow2_cache_put(s->refcount_block_cache, refcount_block);
460
461
462
463
464
465
466
467
468
469
470
471
472 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
473
474
475
476
477
478
479
480
481
482
483
484
485 uint64_t blocks_used = DIV_ROUND_UP(MAX(cluster_index + 1,
486 (new_block >> s->cluster_bits) + 1),
487 s->refcount_block_size);
488
489
490 uint64_t meta_offset = (blocks_used * s->refcount_block_size) *
491 s->cluster_size;
492
493 ret = qcow2_refcount_area(bs, meta_offset, 0, false,
494 refcount_table_index, new_block);
495 if (ret < 0) {
496 return ret;
497 }
498
499 ret = load_refcount_block(bs, new_block, refcount_block);
500 if (ret < 0) {
501 return ret;
502 }
503
504
505
506
507 return -EAGAIN;
508
509fail:
510 if (*refcount_block != NULL) {
511 qcow2_cache_put(s->refcount_block_cache, refcount_block);
512 }
513 return ret;
514}
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
532 uint64_t additional_clusters, bool exact_size,
533 int new_refblock_index,
534 uint64_t new_refblock_offset)
535{
536 BDRVQcow2State *s = bs->opaque;
537 uint64_t total_refblock_count_u64, additional_refblock_count;
538 int total_refblock_count, table_size, area_reftable_index, table_clusters;
539 int i;
540 uint64_t table_offset, block_offset, end_offset;
541 int ret;
542 uint64_t *new_table;
543
544 assert(!(start_offset % s->cluster_size));
545
546 qcow2_refcount_metadata_size(start_offset / s->cluster_size +
547 additional_clusters,
548 s->cluster_size, s->refcount_order,
549 !exact_size, &total_refblock_count_u64);
550 if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) {
551 return -EFBIG;
552 }
553 total_refblock_count = total_refblock_count_u64;
554
555
556
557
558
559 area_reftable_index = (start_offset / s->cluster_size) /
560 s->refcount_block_size;
561
562 if (exact_size) {
563 table_size = total_refblock_count;
564 } else {
565 table_size = total_refblock_count +
566 DIV_ROUND_UP(total_refblock_count, 2);
567 }
568
569 table_size = ROUND_UP(table_size, s->cluster_size / REFTABLE_ENTRY_SIZE);
570 table_clusters = (table_size * REFTABLE_ENTRY_SIZE) / s->cluster_size;
571
572 if (table_size > QCOW_MAX_REFTABLE_SIZE) {
573 return -EFBIG;
574 }
575
576 new_table = g_try_new0(uint64_t, table_size);
577
578 assert(table_size > 0);
579 if (new_table == NULL) {
580 ret = -ENOMEM;
581 goto fail;
582 }
583
584
585 if (table_size > s->max_refcount_table_index) {
586
587 memcpy(new_table, s->refcount_table,
588 (s->max_refcount_table_index + 1) * REFTABLE_ENTRY_SIZE);
589 } else {
590
591
592
593
594 memcpy(new_table, s->refcount_table, table_size * REFTABLE_ENTRY_SIZE);
595 }
596
597 if (new_refblock_offset) {
598 assert(new_refblock_index < total_refblock_count);
599 new_table[new_refblock_index] = new_refblock_offset;
600 }
601
602
603 additional_refblock_count = 0;
604 for (i = area_reftable_index; i < total_refblock_count; i++) {
605 if (!new_table[i]) {
606 additional_refblock_count++;
607 }
608 }
609
610 table_offset = start_offset + additional_refblock_count * s->cluster_size;
611 end_offset = table_offset + table_clusters * s->cluster_size;
612
613
614 block_offset = start_offset;
615 for (i = area_reftable_index; i < total_refblock_count; i++) {
616 void *refblock_data;
617 uint64_t first_offset_covered;
618
619
620 if (new_table[i]) {
621 ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i],
622 &refblock_data);
623 if (ret < 0) {
624 goto fail;
625 }
626 } else {
627 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache,
628 block_offset, &refblock_data);
629 if (ret < 0) {
630 goto fail;
631 }
632 memset(refblock_data, 0, s->cluster_size);
633 qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
634 refblock_data);
635
636 new_table[i] = block_offset;
637 block_offset += s->cluster_size;
638 }
639
640
641 first_offset_covered = (uint64_t)i * s->refcount_block_size *
642 s->cluster_size;
643 if (first_offset_covered < end_offset) {
644 int j, end_index;
645
646
647
648 if (first_offset_covered < start_offset) {
649 assert(i == area_reftable_index);
650 j = (start_offset - first_offset_covered) / s->cluster_size;
651 assert(j < s->refcount_block_size);
652 } else {
653 j = 0;
654 }
655
656 end_index = MIN((end_offset - first_offset_covered) /
657 s->cluster_size,
658 s->refcount_block_size);
659
660 for (; j < end_index; j++) {
661
662 assert(s->get_refcount(refblock_data, j) == 0);
663 s->set_refcount(refblock_data, j, 1);
664 }
665
666 qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
667 refblock_data);
668 }
669
670 qcow2_cache_put(s->refcount_block_cache, &refblock_data);
671 }
672
673 assert(block_offset == table_offset);
674
675
676 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
677 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
678 if (ret < 0) {
679 goto fail;
680 }
681
682
683 for (i = 0; i < total_refblock_count; i++) {
684 cpu_to_be64s(&new_table[i]);
685 }
686
687 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
688 ret = bdrv_pwrite_sync(bs->file, table_offset,
689 table_size * REFTABLE_ENTRY_SIZE, new_table, 0);
690 if (ret < 0) {
691 goto fail;
692 }
693
694 for (i = 0; i < total_refblock_count; i++) {
695 be64_to_cpus(&new_table[i]);
696 }
697
698
699 struct QEMU_PACKED {
700 uint64_t d64;
701 uint32_t d32;
702 } data;
703 data.d64 = cpu_to_be64(table_offset);
704 data.d32 = cpu_to_be32(table_clusters);
705 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
706 ret = bdrv_pwrite_sync(bs->file,
707 offsetof(QCowHeader, refcount_table_offset),
708 sizeof(data), &data, 0);
709 if (ret < 0) {
710 goto fail;
711 }
712
713
714 uint64_t old_table_offset = s->refcount_table_offset;
715 uint64_t old_table_size = s->refcount_table_size;
716
717 g_free(s->refcount_table);
718 s->refcount_table = new_table;
719 s->refcount_table_size = table_size;
720 s->refcount_table_offset = table_offset;
721 update_max_refcount_table_index(s);
722
723
724 qcow2_free_clusters(bs, old_table_offset,
725 old_table_size * REFTABLE_ENTRY_SIZE,
726 QCOW2_DISCARD_OTHER);
727
728 return end_offset;
729
730fail:
731 g_free(new_table);
732 return ret;
733}
734
735void qcow2_process_discards(BlockDriverState *bs, int ret)
736{
737 BDRVQcow2State *s = bs->opaque;
738 Qcow2DiscardRegion *d, *next;
739
740 QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
741 QTAILQ_REMOVE(&s->discards, d, next);
742
743
744 if (ret >= 0) {
745 int r2 = bdrv_pdiscard(bs->file, d->offset, d->bytes);
746 if (r2 < 0) {
747 trace_qcow2_process_discards_failed_region(d->offset, d->bytes,
748 r2);
749 }
750 }
751
752 g_free(d);
753 }
754}
755
756static void update_refcount_discard(BlockDriverState *bs,
757 uint64_t offset, uint64_t length)
758{
759 BDRVQcow2State *s = bs->opaque;
760 Qcow2DiscardRegion *d, *p, *next;
761
762 QTAILQ_FOREACH(d, &s->discards, next) {
763 uint64_t new_start = MIN(offset, d->offset);
764 uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
765
766 if (new_end - new_start <= length + d->bytes) {
767
768
769
770 assert(d->bytes + length == new_end - new_start);
771 d->offset = new_start;
772 d->bytes = new_end - new_start;
773 goto found;
774 }
775 }
776
777 d = g_malloc(sizeof(*d));
778 *d = (Qcow2DiscardRegion) {
779 .bs = bs,
780 .offset = offset,
781 .bytes = length,
782 };
783 QTAILQ_INSERT_TAIL(&s->discards, d, next);
784
785found:
786
787 QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
788 if (p == d
789 || p->offset > d->offset + d->bytes
790 || d->offset > p->offset + p->bytes)
791 {
792 continue;
793 }
794
795
796 assert(p->offset == d->offset + d->bytes
797 || d->offset == p->offset + p->bytes);
798
799 QTAILQ_REMOVE(&s->discards, p, next);
800 d->offset = MIN(d->offset, p->offset);
801 d->bytes += p->bytes;
802 g_free(p);
803 }
804}
805
806
807
808
809static int update_refcount(BlockDriverState *bs,
810 int64_t offset,
811 int64_t length,
812 uint64_t addend,
813 bool decrease,
814 enum qcow2_discard_type type)
815{
816 BDRVQcow2State *s = bs->opaque;
817 int64_t start, last, cluster_offset;
818 void *refcount_block = NULL;
819 int64_t old_table_index = -1;
820 int ret;
821
822#ifdef DEBUG_ALLOC2
823 fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64
824 " addend=%s%" PRIu64 "\n", offset, length, decrease ? "-" : "",
825 addend);
826#endif
827 if (length < 0) {
828 return -EINVAL;
829 } else if (length == 0) {
830 return 0;
831 }
832
833 if (decrease) {
834 qcow2_cache_set_dependency(bs, s->refcount_block_cache,
835 s->l2_table_cache);
836 }
837
838 start = start_of_cluster(s, offset);
839 last = start_of_cluster(s, offset + length - 1);
840 for(cluster_offset = start; cluster_offset <= last;
841 cluster_offset += s->cluster_size)
842 {
843 int block_index;
844 uint64_t refcount;
845 int64_t cluster_index = cluster_offset >> s->cluster_bits;
846 int64_t table_index = cluster_index >> s->refcount_block_bits;
847
848
849 if (table_index != old_table_index) {
850 if (refcount_block) {
851 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
852 }
853 ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
854
855
856 if (ret == -EAGAIN) {
857 if (s->free_cluster_index > (start >> s->cluster_bits)) {
858 s->free_cluster_index = (start >> s->cluster_bits);
859 }
860 }
861 if (ret < 0) {
862 goto fail;
863 }
864 }
865 old_table_index = table_index;
866
867 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
868
869
870 block_index = cluster_index & (s->refcount_block_size - 1);
871
872 refcount = s->get_refcount(refcount_block, block_index);
873 if (decrease ? (refcount - addend > refcount)
874 : (refcount + addend < refcount ||
875 refcount + addend > s->refcount_max))
876 {
877 ret = -EINVAL;
878 goto fail;
879 }
880 if (decrease) {
881 refcount -= addend;
882 } else {
883 refcount += addend;
884 }
885 if (refcount == 0 && cluster_index < s->free_cluster_index) {
886 s->free_cluster_index = cluster_index;
887 }
888 s->set_refcount(refcount_block, block_index, refcount);
889
890 if (refcount == 0) {
891 void *table;
892
893 table = qcow2_cache_is_table_offset(s->refcount_block_cache,
894 offset);
895 if (table != NULL) {
896 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
897 old_table_index = -1;
898 qcow2_cache_discard(s->refcount_block_cache, table);
899 }
900
901 table = qcow2_cache_is_table_offset(s->l2_table_cache, offset);
902 if (table != NULL) {
903 qcow2_cache_discard(s->l2_table_cache, table);
904 }
905
906 if (s->discard_passthrough[type]) {
907 update_refcount_discard(bs, cluster_offset, s->cluster_size);
908 }
909 }
910 }
911
912 ret = 0;
913fail:
914 if (!s->cache_discards) {
915 qcow2_process_discards(bs, ret);
916 }
917
918
919 if (refcount_block) {
920 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
921 }
922
923
924
925
926
927 if (ret < 0) {
928 int dummy;
929 dummy = update_refcount(bs, offset, cluster_offset - offset, addend,
930 !decrease, QCOW2_DISCARD_NEVER);
931 (void)dummy;
932 }
933
934 return ret;
935}
936
937
938
939
940
941
942
943
944
945int qcow2_update_cluster_refcount(BlockDriverState *bs,
946 int64_t cluster_index,
947 uint64_t addend, bool decrease,
948 enum qcow2_discard_type type)
949{
950 BDRVQcow2State *s = bs->opaque;
951 int ret;
952
953 ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
954 decrease, type);
955 if (ret < 0) {
956 return ret;
957 }
958
959 return 0;
960}
961
962
963
964
965
966
967
968
969
970static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
971 uint64_t max)
972{
973 BDRVQcow2State *s = bs->opaque;
974 uint64_t i, nb_clusters, refcount;
975 int ret;
976
977
978 if (s->cache_discards) {
979 qcow2_process_discards(bs, 0);
980 }
981
982 nb_clusters = size_to_clusters(s, size);
983retry:
984 for(i = 0; i < nb_clusters; i++) {
985 uint64_t next_cluster_index = s->free_cluster_index++;
986 ret = qcow2_get_refcount(bs, next_cluster_index, &refcount);
987
988 if (ret < 0) {
989 return ret;
990 } else if (refcount != 0) {
991 goto retry;
992 }
993 }
994
995
996
997 if (s->free_cluster_index > 0 &&
998 s->free_cluster_index - 1 > (max >> s->cluster_bits))
999 {
1000 return -EFBIG;
1001 }
1002
1003#ifdef DEBUG_ALLOC2
1004 fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
1005 size,
1006 (s->free_cluster_index - nb_clusters) << s->cluster_bits);
1007#endif
1008 return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
1009}
1010
1011int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
1012{
1013 int64_t offset;
1014 int ret;
1015
1016 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
1017 do {
1018 offset = alloc_clusters_noref(bs, size, QCOW_MAX_CLUSTER_OFFSET);
1019 if (offset < 0) {
1020 return offset;
1021 }
1022
1023 ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
1024 } while (ret == -EAGAIN);
1025
1026 if (ret < 0) {
1027 return ret;
1028 }
1029
1030 return offset;
1031}
1032
1033int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
1034 int64_t nb_clusters)
1035{
1036 BDRVQcow2State *s = bs->opaque;
1037 uint64_t cluster_index, refcount;
1038 uint64_t i;
1039 int ret;
1040
1041 assert(nb_clusters >= 0);
1042 if (nb_clusters == 0) {
1043 return 0;
1044 }
1045
1046 do {
1047
1048 cluster_index = offset >> s->cluster_bits;
1049 for(i = 0; i < nb_clusters; i++) {
1050 ret = qcow2_get_refcount(bs, cluster_index++, &refcount);
1051 if (ret < 0) {
1052 return ret;
1053 } else if (refcount != 0) {
1054 break;
1055 }
1056 }
1057
1058
1059 ret = update_refcount(bs, offset, i << s->cluster_bits, 1, false,
1060 QCOW2_DISCARD_NEVER);
1061 } while (ret == -EAGAIN);
1062
1063 if (ret < 0) {
1064 return ret;
1065 }
1066
1067 return i;
1068}
1069
1070
1071
1072int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
1073{
1074 BDRVQcow2State *s = bs->opaque;
1075 int64_t offset;
1076 size_t free_in_cluster;
1077 int ret;
1078
1079 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
1080 assert(size > 0 && size <= s->cluster_size);
1081 assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset));
1082
1083 offset = s->free_byte_offset;
1084
1085 if (offset) {
1086 uint64_t refcount;
1087 ret = qcow2_get_refcount(bs, offset >> s->cluster_bits, &refcount);
1088 if (ret < 0) {
1089 return ret;
1090 }
1091
1092 if (refcount == s->refcount_max) {
1093 offset = 0;
1094 }
1095 }
1096
1097 free_in_cluster = s->cluster_size - offset_into_cluster(s, offset);
1098 do {
1099 if (!offset || free_in_cluster < size) {
1100 int64_t new_cluster;
1101
1102 new_cluster = alloc_clusters_noref(bs, s->cluster_size,
1103 MIN(s->cluster_offset_mask,
1104 QCOW_MAX_CLUSTER_OFFSET));
1105 if (new_cluster < 0) {
1106 return new_cluster;
1107 }
1108
1109 if (new_cluster == 0) {
1110 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
1111 "allocation of compressed cluster "
1112 "at offset 0");
1113 return -EIO;
1114 }
1115
1116 if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
1117 offset = new_cluster;
1118 free_in_cluster = s->cluster_size;
1119 } else {
1120 free_in_cluster += s->cluster_size;
1121 }
1122 }
1123
1124 assert(offset);
1125 ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
1126 if (ret < 0) {
1127 offset = 0;
1128 }
1129 } while (ret == -EAGAIN);
1130 if (ret < 0) {
1131 return ret;
1132 }
1133
1134
1135
1136 qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
1137
1138 s->free_byte_offset = offset + size;
1139 if (!offset_into_cluster(s, s->free_byte_offset)) {
1140 s->free_byte_offset = 0;
1141 }
1142
1143 return offset;
1144}
1145
1146void qcow2_free_clusters(BlockDriverState *bs,
1147 int64_t offset, int64_t size,
1148 enum qcow2_discard_type type)
1149{
1150 int ret;
1151
1152 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
1153 ret = update_refcount(bs, offset, size, 1, true, type);
1154 if (ret < 0) {
1155 fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
1156
1157 }
1158}
1159
1160
1161
1162
1163
1164void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
1165 enum qcow2_discard_type type)
1166{
1167 BDRVQcow2State *s = bs->opaque;
1168 QCow2ClusterType ctype = qcow2_get_cluster_type(bs, l2_entry);
1169
1170 if (has_data_file(bs)) {
1171 if (s->discard_passthrough[type] &&
1172 (ctype == QCOW2_CLUSTER_NORMAL ||
1173 ctype == QCOW2_CLUSTER_ZERO_ALLOC))
1174 {
1175 bdrv_pdiscard(s->data_file, l2_entry & L2E_OFFSET_MASK,
1176 s->cluster_size);
1177 }
1178 return;
1179 }
1180
1181 switch (ctype) {
1182 case QCOW2_CLUSTER_COMPRESSED:
1183 {
1184 uint64_t coffset;
1185 int csize;
1186
1187 qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
1188 qcow2_free_clusters(bs, coffset, csize, type);
1189 }
1190 break;
1191 case QCOW2_CLUSTER_NORMAL:
1192 case QCOW2_CLUSTER_ZERO_ALLOC:
1193 if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
1194 qcow2_signal_corruption(bs, false, -1, -1,
1195 "Cannot free unaligned cluster %#llx",
1196 l2_entry & L2E_OFFSET_MASK);
1197 } else {
1198 qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
1199 s->cluster_size, type);
1200 }
1201 break;
1202 case QCOW2_CLUSTER_ZERO_PLAIN:
1203 case QCOW2_CLUSTER_UNALLOCATED:
1204 break;
1205 default:
1206 abort();
1207 }
1208}
1209
1210int qcow2_write_caches(BlockDriverState *bs)
1211{
1212 BDRVQcow2State *s = bs->opaque;
1213 int ret;
1214
1215 ret = qcow2_cache_write(bs, s->l2_table_cache);
1216 if (ret < 0) {
1217 return ret;
1218 }
1219
1220 if (qcow2_need_accurate_refcounts(s)) {
1221 ret = qcow2_cache_write(bs, s->refcount_block_cache);
1222 if (ret < 0) {
1223 return ret;
1224 }
1225 }
1226
1227 return 0;
1228}
1229
1230int qcow2_flush_caches(BlockDriverState *bs)
1231{
1232 int ret = qcow2_write_caches(bs);
1233 if (ret < 0) {
1234 return ret;
1235 }
1236
1237 return bdrv_flush(bs->file->bs);
1238}
1239
1240
1241
1242
1243
1244
1245
1246int qcow2_update_snapshot_refcount(BlockDriverState *bs,
1247 int64_t l1_table_offset, int l1_size, int addend)
1248{
1249 BDRVQcow2State *s = bs->opaque;
1250 uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount;
1251 bool l1_allocated = false;
1252 int64_t old_entry, old_l2_offset;
1253 unsigned slice, slice_size2, n_slices;
1254 int i, j, l1_modified = 0;
1255 int ret;
1256
1257 assert(addend >= -1 && addend <= 1);
1258
1259 l2_slice = NULL;
1260 l1_table = NULL;
1261 l1_size2 = l1_size * L1E_SIZE;
1262 slice_size2 = s->l2_slice_size * l2_entry_size(s);
1263 n_slices = s->cluster_size / slice_size2;
1264
1265 s->cache_discards = true;
1266
1267
1268
1269
1270 if (l1_table_offset != s->l1_table_offset) {
1271 l1_table = g_try_malloc0(l1_size2);
1272 if (l1_size2 && l1_table == NULL) {
1273 ret = -ENOMEM;
1274 goto fail;
1275 }
1276 l1_allocated = true;
1277
1278 ret = bdrv_pread(bs->file, l1_table_offset, l1_size2, l1_table, 0);
1279 if (ret < 0) {
1280 goto fail;
1281 }
1282
1283 for (i = 0; i < l1_size; i++) {
1284 be64_to_cpus(&l1_table[i]);
1285 }
1286 } else {
1287 assert(l1_size == s->l1_size);
1288 l1_table = s->l1_table;
1289 l1_allocated = false;
1290 }
1291
1292 for (i = 0; i < l1_size; i++) {
1293 l2_offset = l1_table[i];
1294 if (l2_offset) {
1295 old_l2_offset = l2_offset;
1296 l2_offset &= L1E_OFFSET_MASK;
1297
1298 if (offset_into_cluster(s, l2_offset)) {
1299 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
1300 PRIx64 " unaligned (L1 index: %#x)",
1301 l2_offset, i);
1302 ret = -EIO;
1303 goto fail;
1304 }
1305
1306 for (slice = 0; slice < n_slices; slice++) {
1307 ret = qcow2_cache_get(bs, s->l2_table_cache,
1308 l2_offset + slice * slice_size2,
1309 (void **) &l2_slice);
1310 if (ret < 0) {
1311 goto fail;
1312 }
1313
1314 for (j = 0; j < s->l2_slice_size; j++) {
1315 uint64_t cluster_index;
1316 uint64_t offset;
1317
1318 entry = get_l2_entry(s, l2_slice, j);
1319 old_entry = entry;
1320 entry &= ~QCOW_OFLAG_COPIED;
1321 offset = entry & L2E_OFFSET_MASK;
1322
1323 switch (qcow2_get_cluster_type(bs, entry)) {
1324 case QCOW2_CLUSTER_COMPRESSED:
1325 if (addend != 0) {
1326 uint64_t coffset;
1327 int csize;
1328
1329 qcow2_parse_compressed_l2_entry(bs, entry,
1330 &coffset, &csize);
1331 ret = update_refcount(
1332 bs, coffset, csize,
1333 abs(addend), addend < 0,
1334 QCOW2_DISCARD_SNAPSHOT);
1335 if (ret < 0) {
1336 goto fail;
1337 }
1338 }
1339
1340 refcount = 2;
1341 break;
1342
1343 case QCOW2_CLUSTER_NORMAL:
1344 case QCOW2_CLUSTER_ZERO_ALLOC:
1345 if (offset_into_cluster(s, offset)) {
1346
1347 int l2_index = slice * s->l2_slice_size + j;
1348 qcow2_signal_corruption(
1349 bs, true, -1, -1, "Cluster "
1350 "allocation offset %#" PRIx64
1351 " unaligned (L2 offset: %#"
1352 PRIx64 ", L2 index: %#x)",
1353 offset, l2_offset, l2_index);
1354 ret = -EIO;
1355 goto fail;
1356 }
1357
1358 cluster_index = offset >> s->cluster_bits;
1359 assert(cluster_index);
1360 if (addend != 0) {
1361 ret = qcow2_update_cluster_refcount(
1362 bs, cluster_index, abs(addend), addend < 0,
1363 QCOW2_DISCARD_SNAPSHOT);
1364 if (ret < 0) {
1365 goto fail;
1366 }
1367 }
1368
1369 ret = qcow2_get_refcount(bs, cluster_index, &refcount);
1370 if (ret < 0) {
1371 goto fail;
1372 }
1373 break;
1374
1375 case QCOW2_CLUSTER_ZERO_PLAIN:
1376 case QCOW2_CLUSTER_UNALLOCATED:
1377 refcount = 0;
1378 break;
1379
1380 default:
1381 abort();
1382 }
1383
1384 if (refcount == 1) {
1385 entry |= QCOW_OFLAG_COPIED;
1386 }
1387 if (entry != old_entry) {
1388 if (addend > 0) {
1389 qcow2_cache_set_dependency(bs, s->l2_table_cache,
1390 s->refcount_block_cache);
1391 }
1392 set_l2_entry(s, l2_slice, j, entry);
1393 qcow2_cache_entry_mark_dirty(s->l2_table_cache,
1394 l2_slice);
1395 }
1396 }
1397
1398 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1399 }
1400
1401 if (addend != 0) {
1402 ret = qcow2_update_cluster_refcount(bs, l2_offset >>
1403 s->cluster_bits,
1404 abs(addend), addend < 0,
1405 QCOW2_DISCARD_SNAPSHOT);
1406 if (ret < 0) {
1407 goto fail;
1408 }
1409 }
1410 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
1411 &refcount);
1412 if (ret < 0) {
1413 goto fail;
1414 } else if (refcount == 1) {
1415 l2_offset |= QCOW_OFLAG_COPIED;
1416 }
1417 if (l2_offset != old_l2_offset) {
1418 l1_table[i] = l2_offset;
1419 l1_modified = 1;
1420 }
1421 }
1422 }
1423
1424 ret = bdrv_flush(bs);
1425fail:
1426 if (l2_slice) {
1427 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
1428 }
1429
1430 s->cache_discards = false;
1431 qcow2_process_discards(bs, ret);
1432
1433
1434 if (ret == 0 && addend >= 0 && l1_modified) {
1435 for (i = 0; i < l1_size; i++) {
1436 cpu_to_be64s(&l1_table[i]);
1437 }
1438
1439 ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_size2, l1_table,
1440 0);
1441
1442 for (i = 0; i < l1_size; i++) {
1443 be64_to_cpus(&l1_table[i]);
1444 }
1445 }
1446 if (l1_allocated)
1447 g_free(l1_table);
1448 return ret;
1449}
1450
1451
1452
1453
1454
1455
1456
1457
1458static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries)
1459{
1460
1461
1462
1463
1464 assert(entries < (UINT64_C(1) << (64 - 9)));
1465
1466
1467
1468
1469 return DIV_ROUND_UP(entries << s->refcount_order, 8);
1470}
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481static int realloc_refcount_array(BDRVQcow2State *s, void **array,
1482 int64_t *size, int64_t new_size)
1483{
1484 int64_t old_byte_size, new_byte_size;
1485 void *new_ptr;
1486
1487
1488 old_byte_size = size_to_clusters(s, refcount_array_byte_size(s, *size))
1489 * s->cluster_size;
1490 new_byte_size = size_to_clusters(s, refcount_array_byte_size(s, new_size))
1491 * s->cluster_size;
1492
1493 if (new_byte_size == old_byte_size) {
1494 *size = new_size;
1495 return 0;
1496 }
1497
1498 assert(new_byte_size > 0);
1499
1500 if (new_byte_size > SIZE_MAX) {
1501 return -ENOMEM;
1502 }
1503
1504 new_ptr = g_try_realloc(*array, new_byte_size);
1505 if (!new_ptr) {
1506 return -ENOMEM;
1507 }
1508
1509 if (new_byte_size > old_byte_size) {
1510 memset((char *)new_ptr + old_byte_size, 0,
1511 new_byte_size - old_byte_size);
1512 }
1513
1514 *array = new_ptr;
1515 *size = new_size;
1516
1517 return 0;
1518}
1519
1520
1521
1522
1523
1524
1525
1526
1527int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
1528 void **refcount_table,
1529 int64_t *refcount_table_size,
1530 int64_t offset, int64_t size)
1531{
1532 BDRVQcow2State *s = bs->opaque;
1533 uint64_t start, last, cluster_offset, k, refcount;
1534 int64_t file_len;
1535 int ret;
1536
1537 if (size <= 0) {
1538 return 0;
1539 }
1540
1541 file_len = bdrv_getlength(bs->file->bs);
1542 if (file_len < 0) {
1543 return file_len;
1544 }
1545
1546
1547
1548
1549
1550
1551 if (offset + size - file_len >= s->cluster_size) {
1552 fprintf(stderr, "ERROR: counting reference for region exceeding the "
1553 "end of the file by one cluster or more: offset 0x%" PRIx64
1554 " size 0x%" PRIx64 "\n", offset, size);
1555 res->corruptions++;
1556 return 0;
1557 }
1558
1559 start = start_of_cluster(s, offset);
1560 last = start_of_cluster(s, offset + size - 1);
1561 for(cluster_offset = start; cluster_offset <= last;
1562 cluster_offset += s->cluster_size) {
1563 k = cluster_offset >> s->cluster_bits;
1564 if (k >= *refcount_table_size) {
1565 ret = realloc_refcount_array(s, refcount_table,
1566 refcount_table_size, k + 1);
1567 if (ret < 0) {
1568 res->check_errors++;
1569 return ret;
1570 }
1571 }
1572
1573 refcount = s->get_refcount(*refcount_table, k);
1574 if (refcount == s->refcount_max) {
1575 fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
1576 "\n", cluster_offset);
1577 fprintf(stderr, "Use qemu-img amend to increase the refcount entry "
1578 "width or qemu-img convert to create a clean copy if the "
1579 "image cannot be opened for writing\n");
1580 res->corruptions++;
1581 continue;
1582 }
1583 s->set_refcount(*refcount_table, k, refcount + 1);
1584 }
1585
1586 return 0;
1587}
1588
1589
1590enum {
1591 CHECK_FRAG_INFO = 0x2,
1592};
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
1604 uint64_t l2_offset,
1605 uint64_t *l2_table, int l2_index, bool active,
1606 bool *metadata_overlap)
1607{
1608 BDRVQcow2State *s = bs->opaque;
1609 int ret;
1610 int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t));
1611 uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s);
1612 int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2;
1613
1614 if (has_subclusters(s)) {
1615 uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, l2_index);
1616
1617
1618 l2_bitmap |= l2_bitmap << 32;
1619 l2_bitmap &= QCOW_L2_BITMAP_ALL_ZEROES;
1620
1621 set_l2_bitmap(s, l2_table, l2_index, l2_bitmap);
1622 set_l2_entry(s, l2_table, l2_index, 0);
1623 } else {
1624 set_l2_entry(s, l2_table, l2_index, QCOW_OFLAG_ZERO);
1625 }
1626
1627 ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s),
1628 false);
1629 if (metadata_overlap) {
1630 *metadata_overlap = ret < 0;
1631 }
1632 if (ret < 0) {
1633 fprintf(stderr, "ERROR: Overlap check failed\n");
1634 goto fail;
1635 }
1636
1637 ret = bdrv_pwrite_sync(bs->file, l2e_offset, l2_entry_size(s),
1638 &l2_table[idx], 0);
1639 if (ret < 0) {
1640 fprintf(stderr, "ERROR: Failed to overwrite L2 "
1641 "table entry: %s\n", strerror(-ret));
1642 goto fail;
1643 }
1644
1645 res->corruptions--;
1646 res->corruptions_fixed++;
1647 return 0;
1648
1649fail:
1650 res->check_errors++;
1651 return ret;
1652}
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
1663 void **refcount_table,
1664 int64_t *refcount_table_size, int64_t l2_offset,
1665 int flags, BdrvCheckMode fix, bool active)
1666{
1667 BDRVQcow2State *s = bs->opaque;
1668 uint64_t l2_entry, l2_bitmap;
1669 uint64_t next_contiguous_offset = 0;
1670 int i, ret;
1671 size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
1672 g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
1673 bool metadata_overlap;
1674
1675
1676 ret = bdrv_pread(bs->file, l2_offset, l2_size_bytes, l2_table, 0);
1677 if (ret < 0) {
1678 fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
1679 res->check_errors++;
1680 return ret;
1681 }
1682
1683
1684 for (i = 0; i < s->l2_size; i++) {
1685 uint64_t coffset;
1686 int csize;
1687 QCow2ClusterType type;
1688
1689 l2_entry = get_l2_entry(s, l2_table, i);
1690 l2_bitmap = get_l2_bitmap(s, l2_table, i);
1691 type = qcow2_get_cluster_type(bs, l2_entry);
1692
1693 if (type != QCOW2_CLUSTER_COMPRESSED) {
1694
1695 if (l2_entry & L2E_STD_RESERVED_MASK) {
1696 fprintf(stderr, "ERROR found l2 entry with reserved bits set: "
1697 "%" PRIx64 "\n", l2_entry);
1698 res->corruptions++;
1699 }
1700 }
1701
1702 switch (type) {
1703 case QCOW2_CLUSTER_COMPRESSED:
1704
1705 if (l2_entry & QCOW_OFLAG_COPIED) {
1706 fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": "
1707 "copied flag must never be set for compressed "
1708 "clusters\n", l2_entry & s->cluster_offset_mask);
1709 l2_entry &= ~QCOW_OFLAG_COPIED;
1710 res->corruptions++;
1711 }
1712
1713 if (has_data_file(bs)) {
1714 fprintf(stderr, "ERROR compressed cluster %d with data file, "
1715 "entry=0x%" PRIx64 "\n", i, l2_entry);
1716 res->corruptions++;
1717 break;
1718 }
1719
1720 if (l2_bitmap) {
1721 fprintf(stderr, "ERROR compressed cluster %d with non-zero "
1722 "subcluster allocation bitmap, entry=0x%" PRIx64 "\n",
1723 i, l2_entry);
1724 res->corruptions++;
1725 break;
1726 }
1727
1728
1729 qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
1730 ret = qcow2_inc_refcounts_imrt(
1731 bs, res, refcount_table, refcount_table_size, coffset, csize);
1732 if (ret < 0) {
1733 return ret;
1734 }
1735
1736 if (flags & CHECK_FRAG_INFO) {
1737 res->bfi.allocated_clusters++;
1738 res->bfi.compressed_clusters++;
1739
1740
1741
1742
1743
1744
1745
1746 res->bfi.fragmented_clusters++;
1747 }
1748 break;
1749
1750 case QCOW2_CLUSTER_ZERO_ALLOC:
1751 case QCOW2_CLUSTER_NORMAL:
1752 {
1753 uint64_t offset = l2_entry & L2E_OFFSET_MASK;
1754
1755 if ((l2_bitmap >> 32) & l2_bitmap) {
1756 res->corruptions++;
1757 fprintf(stderr, "ERROR offset=%" PRIx64 ": Allocated "
1758 "cluster has corrupted subcluster allocation bitmap\n",
1759 offset);
1760 }
1761
1762
1763 if (offset_into_cluster(s, offset)) {
1764 bool contains_data;
1765 res->corruptions++;
1766
1767 if (has_subclusters(s)) {
1768 contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC);
1769 } else {
1770 contains_data = !(l2_entry & QCOW_OFLAG_ZERO);
1771 }
1772
1773 if (!contains_data) {
1774 fprintf(stderr, "%s offset=%" PRIx64 ": Preallocated "
1775 "cluster is not properly aligned; L2 entry "
1776 "corrupted.\n",
1777 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR",
1778 offset);
1779 if (fix & BDRV_FIX_ERRORS) {
1780 ret = fix_l2_entry_by_zero(bs, res, l2_offset,
1781 l2_table, i, active,
1782 &metadata_overlap);
1783 if (metadata_overlap) {
1784
1785
1786
1787
1788 return ret;
1789 }
1790
1791 if (ret == 0) {
1792
1793
1794
1795
1796 continue;
1797 }
1798
1799
1800
1801
1802
1803
1804 }
1805 } else {
1806 fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is "
1807 "not properly aligned; L2 entry corrupted.\n", offset);
1808 }
1809 }
1810
1811 if (flags & CHECK_FRAG_INFO) {
1812 res->bfi.allocated_clusters++;
1813 if (next_contiguous_offset &&
1814 offset != next_contiguous_offset) {
1815 res->bfi.fragmented_clusters++;
1816 }
1817 next_contiguous_offset = offset + s->cluster_size;
1818 }
1819
1820
1821 if (!has_data_file(bs)) {
1822 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table,
1823 refcount_table_size,
1824 offset, s->cluster_size);
1825 if (ret < 0) {
1826 return ret;
1827 }
1828 }
1829 break;
1830 }
1831
1832 case QCOW2_CLUSTER_ZERO_PLAIN:
1833
1834 assert(!l2_bitmap);
1835 break;
1836
1837 case QCOW2_CLUSTER_UNALLOCATED:
1838 if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) {
1839 res->corruptions++;
1840 fprintf(stderr, "ERROR: Unallocated "
1841 "cluster has non-zero subcluster allocation map\n");
1842 }
1843 break;
1844
1845 default:
1846 abort();
1847 }
1848 }
1849
1850 return 0;
1851}
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861static int check_refcounts_l1(BlockDriverState *bs,
1862 BdrvCheckResult *res,
1863 void **refcount_table,
1864 int64_t *refcount_table_size,
1865 int64_t l1_table_offset, int l1_size,
1866 int flags, BdrvCheckMode fix, bool active)
1867{
1868 BDRVQcow2State *s = bs->opaque;
1869 size_t l1_size_bytes = l1_size * L1E_SIZE;
1870 g_autofree uint64_t *l1_table = NULL;
1871 uint64_t l2_offset;
1872 int i, ret;
1873
1874 if (!l1_size) {
1875 return 0;
1876 }
1877
1878
1879 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
1880 l1_table_offset, l1_size_bytes);
1881 if (ret < 0) {
1882 return ret;
1883 }
1884
1885 l1_table = g_try_malloc(l1_size_bytes);
1886 if (l1_table == NULL) {
1887 res->check_errors++;
1888 return -ENOMEM;
1889 }
1890
1891
1892 ret = bdrv_pread(bs->file, l1_table_offset, l1_size_bytes, l1_table, 0);
1893 if (ret < 0) {
1894 fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
1895 res->check_errors++;
1896 return ret;
1897 }
1898
1899 for (i = 0; i < l1_size; i++) {
1900 be64_to_cpus(&l1_table[i]);
1901 }
1902
1903
1904 for (i = 0; i < l1_size; i++) {
1905 if (!l1_table[i]) {
1906 continue;
1907 }
1908
1909 if (l1_table[i] & L1E_RESERVED_MASK) {
1910 fprintf(stderr, "ERROR found L1 entry with reserved bits set: "
1911 "%" PRIx64 "\n", l1_table[i]);
1912 res->corruptions++;
1913 }
1914
1915 l2_offset = l1_table[i] & L1E_OFFSET_MASK;
1916
1917
1918 ret = qcow2_inc_refcounts_imrt(bs, res,
1919 refcount_table, refcount_table_size,
1920 l2_offset, s->cluster_size);
1921 if (ret < 0) {
1922 return ret;
1923 }
1924
1925
1926 if (offset_into_cluster(s, l2_offset)) {
1927 fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
1928 "cluster aligned; L1 entry corrupted\n", l2_offset);
1929 res->corruptions++;
1930 }
1931
1932
1933 ret = check_refcounts_l2(bs, res, refcount_table,
1934 refcount_table_size, l2_offset, flags,
1935 fix, active);
1936 if (ret < 0) {
1937 return ret;
1938 }
1939 }
1940
1941 return 0;
1942}
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
1953 BdrvCheckMode fix)
1954{
1955 BDRVQcow2State *s = bs->opaque;
1956 uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
1957 int ret;
1958 uint64_t refcount;
1959 int i, j;
1960 bool repair;
1961
1962 if (fix & BDRV_FIX_ERRORS) {
1963
1964 repair = true;
1965 } else if (fix & BDRV_FIX_LEAKS) {
1966
1967
1968
1969 repair = !res->check_errors && !res->corruptions && !res->leaks;
1970 } else {
1971 repair = false;
1972 }
1973
1974 for (i = 0; i < s->l1_size; i++) {
1975 uint64_t l1_entry = s->l1_table[i];
1976 uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
1977 int l2_dirty = 0;
1978
1979 if (!l2_offset) {
1980 continue;
1981 }
1982
1983 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
1984 &refcount);
1985 if (ret < 0) {
1986
1987 continue;
1988 }
1989 if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
1990 res->corruptions++;
1991 fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
1992 "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
1993 repair ? "Repairing" : "ERROR", i, l1_entry, refcount);
1994 if (repair) {
1995 s->l1_table[i] = refcount == 1
1996 ? l1_entry | QCOW_OFLAG_COPIED
1997 : l1_entry & ~QCOW_OFLAG_COPIED;
1998 ret = qcow2_write_l1_entry(bs, i);
1999 if (ret < 0) {
2000 res->check_errors++;
2001 goto fail;
2002 }
2003 res->corruptions--;
2004 res->corruptions_fixed++;
2005 }
2006 }
2007
2008 ret = bdrv_pread(bs->file, l2_offset, s->l2_size * l2_entry_size(s),
2009 l2_table, 0);
2010 if (ret < 0) {
2011 fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
2012 strerror(-ret));
2013 res->check_errors++;
2014 goto fail;
2015 }
2016
2017 for (j = 0; j < s->l2_size; j++) {
2018 uint64_t l2_entry = get_l2_entry(s, l2_table, j);
2019 uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
2020 QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, l2_entry);
2021
2022 if (cluster_type == QCOW2_CLUSTER_NORMAL ||
2023 cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) {
2024 if (has_data_file(bs)) {
2025 refcount = 1;
2026 } else {
2027 ret = qcow2_get_refcount(bs,
2028 data_offset >> s->cluster_bits,
2029 &refcount);
2030 if (ret < 0) {
2031
2032 continue;
2033 }
2034 }
2035 if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
2036 res->corruptions++;
2037 fprintf(stderr, "%s OFLAG_COPIED data cluster: "
2038 "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
2039 repair ? "Repairing" : "ERROR", l2_entry, refcount);
2040 if (repair) {
2041 set_l2_entry(s, l2_table, j,
2042 refcount == 1 ?
2043 l2_entry | QCOW_OFLAG_COPIED :
2044 l2_entry & ~QCOW_OFLAG_COPIED);
2045 l2_dirty++;
2046 }
2047 }
2048 }
2049 }
2050
2051 if (l2_dirty > 0) {
2052 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
2053 l2_offset, s->cluster_size,
2054 false);
2055 if (ret < 0) {
2056 fprintf(stderr, "ERROR: Could not write L2 table; metadata "
2057 "overlap check failed: %s\n", strerror(-ret));
2058 res->check_errors++;
2059 goto fail;
2060 }
2061
2062 ret = bdrv_pwrite(bs->file, l2_offset, s->cluster_size, l2_table,
2063 0);
2064 if (ret < 0) {
2065 fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
2066 strerror(-ret));
2067 res->check_errors++;
2068 goto fail;
2069 }
2070 res->corruptions -= l2_dirty;
2071 res->corruptions_fixed += l2_dirty;
2072 }
2073 }
2074
2075 ret = 0;
2076
2077fail:
2078 qemu_vfree(l2_table);
2079 return ret;
2080}
2081
2082
2083
2084
2085
2086static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
2087 BdrvCheckMode fix, bool *rebuild,
2088 void **refcount_table, int64_t *nb_clusters)
2089{
2090 BDRVQcow2State *s = bs->opaque;
2091 int64_t i, size;
2092 int ret;
2093
2094 for(i = 0; i < s->refcount_table_size; i++) {
2095 uint64_t offset, cluster;
2096 offset = s->refcount_table[i] & REFT_OFFSET_MASK;
2097 cluster = offset >> s->cluster_bits;
2098
2099 if (s->refcount_table[i] & REFT_RESERVED_MASK) {
2100 fprintf(stderr, "ERROR refcount table entry %" PRId64 " has "
2101 "reserved bits set\n", i);
2102 res->corruptions++;
2103 *rebuild = true;
2104 continue;
2105 }
2106
2107
2108 if (offset_into_cluster(s, offset)) {
2109 fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
2110 "cluster aligned; refcount table entry corrupted\n", i);
2111 res->corruptions++;
2112 *rebuild = true;
2113 continue;
2114 }
2115
2116 if (cluster >= *nb_clusters) {
2117 res->corruptions++;
2118 fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n",
2119 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
2120
2121 if (fix & BDRV_FIX_ERRORS) {
2122 int64_t new_nb_clusters;
2123 Error *local_err = NULL;
2124
2125 if (offset > INT64_MAX - s->cluster_size) {
2126 ret = -EINVAL;
2127 goto resize_fail;
2128 }
2129
2130 ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
2131 PREALLOC_MODE_OFF, 0, &local_err);
2132 if (ret < 0) {
2133 error_report_err(local_err);
2134 goto resize_fail;
2135 }
2136 size = bdrv_getlength(bs->file->bs);
2137 if (size < 0) {
2138 ret = size;
2139 goto resize_fail;
2140 }
2141
2142 new_nb_clusters = size_to_clusters(s, size);
2143 assert(new_nb_clusters >= *nb_clusters);
2144
2145 ret = realloc_refcount_array(s, refcount_table,
2146 nb_clusters, new_nb_clusters);
2147 if (ret < 0) {
2148 res->check_errors++;
2149 return ret;
2150 }
2151
2152 if (cluster >= *nb_clusters) {
2153 ret = -EINVAL;
2154 goto resize_fail;
2155 }
2156
2157 res->corruptions--;
2158 res->corruptions_fixed++;
2159 ret = qcow2_inc_refcounts_imrt(bs, res,
2160 refcount_table, nb_clusters,
2161 offset, s->cluster_size);
2162 if (ret < 0) {
2163 return ret;
2164 }
2165
2166
2167
2168 continue;
2169
2170resize_fail:
2171 *rebuild = true;
2172 fprintf(stderr, "ERROR could not resize image: %s\n",
2173 strerror(-ret));
2174 }
2175 continue;
2176 }
2177
2178 if (offset != 0) {
2179 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2180 offset, s->cluster_size);
2181 if (ret < 0) {
2182 return ret;
2183 }
2184 if (s->get_refcount(*refcount_table, cluster) != 1) {
2185 fprintf(stderr, "ERROR refcount block %" PRId64
2186 " refcount=%" PRIu64 "\n", i,
2187 s->get_refcount(*refcount_table, cluster));
2188 res->corruptions++;
2189 *rebuild = true;
2190 }
2191 }
2192 }
2193
2194 return 0;
2195}
2196
2197
2198
2199
2200static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2201 BdrvCheckMode fix, bool *rebuild,
2202 void **refcount_table, int64_t *nb_clusters)
2203{
2204 BDRVQcow2State *s = bs->opaque;
2205 int64_t i;
2206 QCowSnapshot *sn;
2207 int ret;
2208
2209 if (!*refcount_table) {
2210 int64_t old_size = 0;
2211 ret = realloc_refcount_array(s, refcount_table,
2212 &old_size, *nb_clusters);
2213 if (ret < 0) {
2214 res->check_errors++;
2215 return ret;
2216 }
2217 }
2218
2219
2220 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2221 0, s->cluster_size);
2222 if (ret < 0) {
2223 return ret;
2224 }
2225
2226
2227 ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
2228 s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO,
2229 fix, true);
2230 if (ret < 0) {
2231 return ret;
2232 }
2233
2234
2235 if (has_data_file(bs) && s->nb_snapshots) {
2236 fprintf(stderr, "ERROR %d snapshots in image with data file\n",
2237 s->nb_snapshots);
2238 res->corruptions++;
2239 }
2240
2241 for (i = 0; i < s->nb_snapshots; i++) {
2242 sn = s->snapshots + i;
2243 if (offset_into_cluster(s, sn->l1_table_offset)) {
2244 fprintf(stderr, "ERROR snapshot %s (%s) l1_offset=%#" PRIx64 ": "
2245 "L1 table is not cluster aligned; snapshot table entry "
2246 "corrupted\n", sn->id_str, sn->name, sn->l1_table_offset);
2247 res->corruptions++;
2248 continue;
2249 }
2250 if (sn->l1_size > QCOW_MAX_L1_SIZE / L1E_SIZE) {
2251 fprintf(stderr, "ERROR snapshot %s (%s) l1_size=%#" PRIx32 ": "
2252 "L1 table is too large; snapshot table entry corrupted\n",
2253 sn->id_str, sn->name, sn->l1_size);
2254 res->corruptions++;
2255 continue;
2256 }
2257 ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
2258 sn->l1_table_offset, sn->l1_size, 0, fix,
2259 false);
2260 if (ret < 0) {
2261 return ret;
2262 }
2263 }
2264 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2265 s->snapshots_offset, s->snapshots_size);
2266 if (ret < 0) {
2267 return ret;
2268 }
2269
2270
2271 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2272 s->refcount_table_offset,
2273 s->refcount_table_size *
2274 REFTABLE_ENTRY_SIZE);
2275 if (ret < 0) {
2276 return ret;
2277 }
2278
2279
2280 if (s->crypto_header.length) {
2281 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
2282 s->crypto_header.offset,
2283 s->crypto_header.length);
2284 if (ret < 0) {
2285 return ret;
2286 }
2287 }
2288
2289
2290 ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters);
2291 if (ret < 0) {
2292 return ret;
2293 }
2294
2295 return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters);
2296}
2297
2298
2299
2300
2301
2302static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2303 BdrvCheckMode fix, bool *rebuild,
2304 int64_t *highest_cluster,
2305 void *refcount_table, int64_t nb_clusters)
2306{
2307 BDRVQcow2State *s = bs->opaque;
2308 int64_t i;
2309 uint64_t refcount1, refcount2;
2310 int ret;
2311
2312 for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
2313 ret = qcow2_get_refcount(bs, i, &refcount1);
2314 if (ret < 0) {
2315 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
2316 i, strerror(-ret));
2317 res->check_errors++;
2318 continue;
2319 }
2320
2321 refcount2 = s->get_refcount(refcount_table, i);
2322
2323 if (refcount1 > 0 || refcount2 > 0) {
2324 *highest_cluster = i;
2325 }
2326
2327 if (refcount1 != refcount2) {
2328
2329 int *num_fixed = NULL;
2330 if (refcount1 == 0) {
2331 *rebuild = true;
2332 } else if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
2333 num_fixed = &res->leaks_fixed;
2334 } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
2335 num_fixed = &res->corruptions_fixed;
2336 }
2337
2338 fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64
2339 " reference=%" PRIu64 "\n",
2340 num_fixed != NULL ? "Repairing" :
2341 refcount1 < refcount2 ? "ERROR" :
2342 "Leaked",
2343 i, refcount1, refcount2);
2344
2345 if (num_fixed) {
2346 ret = update_refcount(bs, i << s->cluster_bits, 1,
2347 refcount_diff(refcount1, refcount2),
2348 refcount1 > refcount2,
2349 QCOW2_DISCARD_ALWAYS);
2350 if (ret >= 0) {
2351 (*num_fixed)++;
2352 continue;
2353 }
2354 }
2355
2356
2357 if (refcount1 < refcount2) {
2358 res->corruptions++;
2359 } else {
2360 res->leaks++;
2361 }
2362 }
2363 }
2364}
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378static int64_t alloc_clusters_imrt(BlockDriverState *bs,
2379 int cluster_count,
2380 void **refcount_table,
2381 int64_t *imrt_nb_clusters,
2382 int64_t *first_free_cluster)
2383{
2384 BDRVQcow2State *s = bs->opaque;
2385 int64_t cluster = *first_free_cluster, i;
2386 bool first_gap = true;
2387 int contiguous_free_clusters;
2388 int ret;
2389
2390
2391
2392 for (contiguous_free_clusters = 0;
2393 cluster < *imrt_nb_clusters &&
2394 contiguous_free_clusters < cluster_count;
2395 cluster++)
2396 {
2397 if (!s->get_refcount(*refcount_table, cluster)) {
2398 contiguous_free_clusters++;
2399 if (first_gap) {
2400
2401
2402 *first_free_cluster = cluster;
2403 first_gap = false;
2404 }
2405 } else if (contiguous_free_clusters) {
2406 contiguous_free_clusters = 0;
2407 }
2408 }
2409
2410
2411
2412
2413
2414
2415
2416
2417 if (contiguous_free_clusters < cluster_count) {
2418
2419
2420
2421
2422
2423
2424 ret = realloc_refcount_array(s, refcount_table, imrt_nb_clusters,
2425 cluster + cluster_count
2426 - contiguous_free_clusters);
2427 if (ret < 0) {
2428 return ret;
2429 }
2430 }
2431
2432
2433 cluster -= contiguous_free_clusters;
2434 for (i = 0; i < cluster_count; i++) {
2435 s->set_refcount(*refcount_table, cluster + i, 1);
2436 }
2437
2438 return cluster << s->cluster_bits;
2439}
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466static int rebuild_refcounts_write_refblocks(
2467 BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
2468 int64_t first_cluster, int64_t end_cluster,
2469 uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr,
2470 Error **errp
2471 )
2472{
2473 BDRVQcow2State *s = bs->opaque;
2474 int64_t cluster;
2475 int64_t refblock_offset, refblock_start, refblock_index;
2476 int64_t first_free_cluster = 0;
2477 uint64_t *on_disk_reftable = *on_disk_reftable_ptr;
2478 uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr;
2479 void *on_disk_refblock;
2480 bool reftable_grown = false;
2481 int ret;
2482
2483 for (cluster = first_cluster; cluster < end_cluster; cluster++) {
2484
2485 if (!s->get_refcount(*refcount_table, cluster)) {
2486 continue;
2487 }
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498 refblock_index = cluster >> s->refcount_block_bits;
2499 refblock_start = refblock_index << s->refcount_block_bits;
2500
2501 if (on_disk_reftable_entries > refblock_index &&
2502 on_disk_reftable[refblock_index])
2503 {
2504
2505
2506
2507
2508
2509 refblock_offset = on_disk_reftable[refblock_index];
2510 } else {
2511 int64_t refblock_cluster_index;
2512
2513
2514 if (first_free_cluster < refblock_start) {
2515 first_free_cluster = refblock_start;
2516 }
2517 refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
2518 nb_clusters,
2519 &first_free_cluster);
2520 if (refblock_offset < 0) {
2521 error_setg_errno(errp, -refblock_offset,
2522 "ERROR allocating refblock");
2523 return refblock_offset;
2524 }
2525
2526 refblock_cluster_index = refblock_offset / s->cluster_size;
2527 if (refblock_cluster_index >= end_cluster) {
2528
2529
2530
2531
2532 end_cluster = refblock_cluster_index + 1;
2533 }
2534
2535 if (on_disk_reftable_entries <= refblock_index) {
2536 on_disk_reftable_entries =
2537 ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
2538 s->cluster_size) / REFTABLE_ENTRY_SIZE;
2539 on_disk_reftable =
2540 g_try_realloc(on_disk_reftable,
2541 on_disk_reftable_entries *
2542 REFTABLE_ENTRY_SIZE);
2543 if (!on_disk_reftable) {
2544 error_setg(errp, "ERROR allocating reftable memory");
2545 return -ENOMEM;
2546 }
2547
2548 memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0,
2549 (on_disk_reftable_entries -
2550 *on_disk_reftable_entries_ptr) *
2551 REFTABLE_ENTRY_SIZE);
2552
2553 *on_disk_reftable_ptr = on_disk_reftable;
2554 *on_disk_reftable_entries_ptr = on_disk_reftable_entries;
2555
2556 reftable_grown = true;
2557 } else {
2558 assert(on_disk_reftable);
2559 }
2560 on_disk_reftable[refblock_index] = refblock_offset;
2561 }
2562
2563
2564
2565 ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
2566 s->cluster_size, false);
2567 if (ret < 0) {
2568 error_setg_errno(errp, -ret, "ERROR writing refblock");
2569 return ret;
2570 }
2571
2572
2573
2574
2575
2576
2577
2578 on_disk_refblock = (void *)((char *) *refcount_table +
2579 refblock_index * s->cluster_size);
2580
2581 ret = bdrv_pwrite(bs->file, refblock_offset, s->cluster_size,
2582 on_disk_refblock, 0);
2583 if (ret < 0) {
2584 error_setg_errno(errp, -ret, "ERROR writing refblock");
2585 return ret;
2586 }
2587
2588
2589 cluster = refblock_start + s->refcount_block_size - 1;
2590 }
2591
2592 return reftable_grown;
2593}
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604static int rebuild_refcount_structure(BlockDriverState *bs,
2605 BdrvCheckResult *res,
2606 void **refcount_table,
2607 int64_t *nb_clusters,
2608 Error **errp)
2609{
2610 BDRVQcow2State *s = bs->opaque;
2611 int64_t reftable_offset = -1;
2612 int64_t reftable_length = 0;
2613 int64_t reftable_clusters;
2614 int64_t refblock_index;
2615 uint32_t on_disk_reftable_entries = 0;
2616 uint64_t *on_disk_reftable = NULL;
2617 int ret = 0;
2618 int reftable_size_changed = 0;
2619 struct {
2620 uint64_t reftable_offset;
2621 uint32_t reftable_clusters;
2622 } QEMU_PACKED reftable_offset_and_clusters;
2623
2624 qcow2_cache_empty(bs, s->refcount_block_cache);
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655 reftable_size_changed =
2656 rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
2657 0, *nb_clusters,
2658 &on_disk_reftable,
2659 &on_disk_reftable_entries, errp);
2660 if (reftable_size_changed < 0) {
2661 res->check_errors++;
2662 ret = reftable_size_changed;
2663 goto fail;
2664 }
2665
2666
2667
2668
2669
2670 assert(reftable_size_changed);
2671
2672 do {
2673 int64_t reftable_start_cluster, reftable_end_cluster;
2674 int64_t first_free_cluster = 0;
2675
2676 reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE;
2677 reftable_clusters = size_to_clusters(s, reftable_length);
2678
2679 reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
2680 refcount_table, nb_clusters,
2681 &first_free_cluster);
2682 if (reftable_offset < 0) {
2683 error_setg_errno(errp, -reftable_offset,
2684 "ERROR allocating reftable");
2685 res->check_errors++;
2686 ret = reftable_offset;
2687 goto fail;
2688 }
2689
2690
2691
2692
2693
2694 assert(offset_into_cluster(s, reftable_offset) == 0);
2695 reftable_start_cluster = reftable_offset / s->cluster_size;
2696 reftable_end_cluster = reftable_start_cluster + reftable_clusters;
2697 reftable_size_changed =
2698 rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
2699 reftable_start_cluster,
2700 reftable_end_cluster,
2701 &on_disk_reftable,
2702 &on_disk_reftable_entries, errp);
2703 if (reftable_size_changed < 0) {
2704 res->check_errors++;
2705 ret = reftable_size_changed;
2706 goto fail;
2707 }
2708
2709
2710
2711
2712
2713 } while (reftable_size_changed);
2714
2715
2716 assert(reftable_offset >= 0);
2717
2718
2719
2720
2721
2722
2723 for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
2724 refblock_index++)
2725 {
2726 cpu_to_be64s(&on_disk_reftable[refblock_index]);
2727 }
2728
2729 ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
2730 false);
2731 if (ret < 0) {
2732 error_setg_errno(errp, -ret, "ERROR writing reftable");
2733 goto fail;
2734 }
2735
2736 assert(reftable_length < INT_MAX);
2737 ret = bdrv_pwrite(bs->file, reftable_offset, reftable_length,
2738 on_disk_reftable, 0);
2739 if (ret < 0) {
2740 error_setg_errno(errp, -ret, "ERROR writing reftable");
2741 goto fail;
2742 }
2743
2744
2745 reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
2746 reftable_offset_and_clusters.reftable_clusters =
2747 cpu_to_be32(reftable_clusters);
2748 ret = bdrv_pwrite_sync(bs->file,
2749 offsetof(QCowHeader, refcount_table_offset),
2750 sizeof(reftable_offset_and_clusters),
2751 &reftable_offset_and_clusters, 0);
2752 if (ret < 0) {
2753 error_setg_errno(errp, -ret, "ERROR setting reftable");
2754 goto fail;
2755 }
2756
2757 for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
2758 refblock_index++)
2759 {
2760 be64_to_cpus(&on_disk_reftable[refblock_index]);
2761 }
2762 s->refcount_table = on_disk_reftable;
2763 s->refcount_table_offset = reftable_offset;
2764 s->refcount_table_size = on_disk_reftable_entries;
2765 update_max_refcount_table_index(s);
2766
2767 return 0;
2768
2769fail:
2770 g_free(on_disk_reftable);
2771 return ret;
2772}
2773
2774
2775
2776
2777
2778
2779
2780int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
2781 BdrvCheckMode fix)
2782{
2783 BDRVQcow2State *s = bs->opaque;
2784 BdrvCheckResult pre_compare_res;
2785 int64_t size, highest_cluster, nb_clusters;
2786 void *refcount_table = NULL;
2787 bool rebuild = false;
2788 int ret;
2789
2790 size = bdrv_getlength(bs->file->bs);
2791 if (size < 0) {
2792 res->check_errors++;
2793 return size;
2794 }
2795
2796 nb_clusters = size_to_clusters(s, size);
2797 if (nb_clusters > INT_MAX) {
2798 res->check_errors++;
2799 return -EFBIG;
2800 }
2801
2802 res->bfi.total_clusters =
2803 size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
2804
2805 ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table,
2806 &nb_clusters);
2807 if (ret < 0) {
2808 goto fail;
2809 }
2810
2811
2812
2813
2814 pre_compare_res = *res;
2815 compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcount_table,
2816 nb_clusters);
2817
2818 if (rebuild && (fix & BDRV_FIX_ERRORS)) {
2819 BdrvCheckResult old_res = *res;
2820 int fresh_leaks = 0;
2821 Error *local_err = NULL;
2822
2823 fprintf(stderr, "Rebuilding refcount structure\n");
2824 ret = rebuild_refcount_structure(bs, res, &refcount_table,
2825 &nb_clusters, &local_err);
2826 if (ret < 0) {
2827 error_report_err(local_err);
2828 goto fail;
2829 }
2830
2831 res->corruptions = 0;
2832 res->leaks = 0;
2833
2834
2835
2836 rebuild = false;
2837 memset(refcount_table, 0, refcount_array_byte_size(s, nb_clusters));
2838 ret = calculate_refcounts(bs, res, 0, &rebuild, &refcount_table,
2839 &nb_clusters);
2840 if (ret < 0) {
2841 goto fail;
2842 }
2843
2844 if (fix & BDRV_FIX_LEAKS) {
2845
2846
2847
2848 BdrvCheckResult saved_res = *res;
2849 *res = (BdrvCheckResult){ 0 };
2850
2851 compare_refcounts(bs, res, BDRV_FIX_LEAKS, &rebuild,
2852 &highest_cluster, refcount_table, nb_clusters);
2853 if (rebuild) {
2854 fprintf(stderr, "ERROR rebuilt refcount structure is still "
2855 "broken\n");
2856 }
2857
2858
2859
2860
2861 fresh_leaks = res->leaks;
2862 *res = saved_res;
2863 }
2864
2865 if (res->corruptions < old_res.corruptions) {
2866 res->corruptions_fixed += old_res.corruptions - res->corruptions;
2867 }
2868 if (res->leaks < old_res.leaks) {
2869 res->leaks_fixed += old_res.leaks - res->leaks;
2870 }
2871 res->leaks += fresh_leaks;
2872 } else if (fix) {
2873 if (rebuild) {
2874 fprintf(stderr, "ERROR need to rebuild refcount structures\n");
2875 res->check_errors++;
2876 ret = -EIO;
2877 goto fail;
2878 }
2879
2880 if (res->leaks || res->corruptions) {
2881 *res = pre_compare_res;
2882 compare_refcounts(bs, res, fix, &rebuild, &highest_cluster,
2883 refcount_table, nb_clusters);
2884 }
2885 }
2886
2887
2888 ret = check_oflag_copied(bs, res, fix);
2889 if (ret < 0) {
2890 goto fail;
2891 }
2892
2893 res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
2894 ret = 0;
2895
2896fail:
2897 g_free(refcount_table);
2898
2899 return ret;
2900}
2901
2902#define overlaps_with(ofs, sz) \
2903 ranges_overlap(offset, size, ofs, sz)
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
2920 int64_t size)
2921{
2922 BDRVQcow2State *s = bs->opaque;
2923 int chk = s->overlap_check & ~ign;
2924 int i, j;
2925
2926 if (!size) {
2927 return 0;
2928 }
2929
2930 if (chk & QCOW2_OL_MAIN_HEADER) {
2931 if (offset < s->cluster_size) {
2932 return QCOW2_OL_MAIN_HEADER;
2933 }
2934 }
2935
2936
2937 size = ROUND_UP(offset_into_cluster(s, offset) + size, s->cluster_size);
2938 offset = start_of_cluster(s, offset);
2939
2940 if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
2941 if (overlaps_with(s->l1_table_offset, s->l1_size * L1E_SIZE)) {
2942 return QCOW2_OL_ACTIVE_L1;
2943 }
2944 }
2945
2946 if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
2947 if (overlaps_with(s->refcount_table_offset,
2948 s->refcount_table_size * REFTABLE_ENTRY_SIZE)) {
2949 return QCOW2_OL_REFCOUNT_TABLE;
2950 }
2951 }
2952
2953 if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
2954 if (overlaps_with(s->snapshots_offset, s->snapshots_size)) {
2955 return QCOW2_OL_SNAPSHOT_TABLE;
2956 }
2957 }
2958
2959 if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
2960 for (i = 0; i < s->nb_snapshots; i++) {
2961 if (s->snapshots[i].l1_size &&
2962 overlaps_with(s->snapshots[i].l1_table_offset,
2963 s->snapshots[i].l1_size * L1E_SIZE)) {
2964 return QCOW2_OL_INACTIVE_L1;
2965 }
2966 }
2967 }
2968
2969 if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
2970 for (i = 0; i < s->l1_size; i++) {
2971 if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
2972 overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK,
2973 s->cluster_size)) {
2974 return QCOW2_OL_ACTIVE_L2;
2975 }
2976 }
2977 }
2978
2979 if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
2980 unsigned last_entry = s->max_refcount_table_index;
2981 assert(last_entry < s->refcount_table_size);
2982 assert(last_entry + 1 == s->refcount_table_size ||
2983 (s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0);
2984 for (i = 0; i <= last_entry; i++) {
2985 if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
2986 overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
2987 s->cluster_size)) {
2988 return QCOW2_OL_REFCOUNT_BLOCK;
2989 }
2990 }
2991 }
2992
2993 if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
2994 for (i = 0; i < s->nb_snapshots; i++) {
2995 uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
2996 uint32_t l1_sz = s->snapshots[i].l1_size;
2997 uint64_t l1_sz2 = l1_sz * L1E_SIZE;
2998 uint64_t *l1;
2999 int ret;
3000
3001 ret = qcow2_validate_table(bs, l1_ofs, l1_sz, L1E_SIZE,
3002 QCOW_MAX_L1_SIZE, "", NULL);
3003 if (ret < 0) {
3004 return ret;
3005 }
3006
3007 l1 = g_try_malloc(l1_sz2);
3008
3009 if (l1_sz2 && l1 == NULL) {
3010 return -ENOMEM;
3011 }
3012
3013 ret = bdrv_pread(bs->file, l1_ofs, l1_sz2, l1, 0);
3014 if (ret < 0) {
3015 g_free(l1);
3016 return ret;
3017 }
3018
3019 for (j = 0; j < l1_sz; j++) {
3020 uint64_t l2_ofs = be64_to_cpu(l1[j]) & L1E_OFFSET_MASK;
3021 if (l2_ofs && overlaps_with(l2_ofs, s->cluster_size)) {
3022 g_free(l1);
3023 return QCOW2_OL_INACTIVE_L2;
3024 }
3025 }
3026
3027 g_free(l1);
3028 }
3029 }
3030
3031 if ((chk & QCOW2_OL_BITMAP_DIRECTORY) &&
3032 (s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS))
3033 {
3034 if (overlaps_with(s->bitmap_directory_offset,
3035 s->bitmap_directory_size))
3036 {
3037 return QCOW2_OL_BITMAP_DIRECTORY;
3038 }
3039 }
3040
3041 return 0;
3042}
3043
3044static const char *metadata_ol_names[] = {
3045 [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header",
3046 [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table",
3047 [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table",
3048 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
3049 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
3050 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
3051 [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table",
3052 [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table",
3053 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = "bitmap directory",
3054};
3055QEMU_BUILD_BUG_ON(QCOW2_OL_MAX_BITNR != ARRAY_SIZE(metadata_ol_names));
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
3068 int64_t size, bool data_file)
3069{
3070 int ret;
3071
3072 if (data_file && has_data_file(bs)) {
3073 return 0;
3074 }
3075
3076 ret = qcow2_check_metadata_overlap(bs, ign, offset, size);
3077 if (ret < 0) {
3078 return ret;
3079 } else if (ret > 0) {
3080 int metadata_ol_bitnr = ctz32(ret);
3081 assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
3082
3083 qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid "
3084 "write on metadata (overlaps with %s)",
3085 metadata_ol_names[metadata_ol_bitnr]);
3086 return -EIO;
3087 }
3088
3089 return 0;
3090}
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
3103 uint64_t reftable_index, uint64_t *reftable_size,
3104 void *refblock, bool refblock_empty,
3105 bool *allocated, Error **errp);
3106
3107
3108
3109
3110
3111
3112static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
3113 uint64_t reftable_index, uint64_t *reftable_size,
3114 void *refblock, bool refblock_empty, bool *allocated,
3115 Error **errp)
3116{
3117 BDRVQcow2State *s = bs->opaque;
3118 int64_t offset;
3119
3120 if (!refblock_empty && reftable_index >= *reftable_size) {
3121 uint64_t *new_reftable;
3122 uint64_t new_reftable_size;
3123
3124 new_reftable_size = ROUND_UP(reftable_index + 1,
3125 s->cluster_size / REFTABLE_ENTRY_SIZE);
3126 if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / REFTABLE_ENTRY_SIZE) {
3127 error_setg(errp,
3128 "This operation would make the refcount table grow "
3129 "beyond the maximum size supported by QEMU, aborting");
3130 return -ENOTSUP;
3131 }
3132
3133 new_reftable = g_try_realloc(*reftable, new_reftable_size *
3134 REFTABLE_ENTRY_SIZE);
3135 if (!new_reftable) {
3136 error_setg(errp, "Failed to increase reftable buffer size");
3137 return -ENOMEM;
3138 }
3139
3140 memset(new_reftable + *reftable_size, 0,
3141 (new_reftable_size - *reftable_size) * REFTABLE_ENTRY_SIZE);
3142
3143 *reftable = new_reftable;
3144 *reftable_size = new_reftable_size;
3145 }
3146
3147 if (!refblock_empty && !(*reftable)[reftable_index]) {
3148 offset = qcow2_alloc_clusters(bs, s->cluster_size);
3149 if (offset < 0) {
3150 error_setg_errno(errp, -offset, "Failed to allocate refblock");
3151 return offset;
3152 }
3153 (*reftable)[reftable_index] = offset;
3154 *allocated = true;
3155 }
3156
3157 return 0;
3158}
3159
3160
3161
3162
3163
3164
3165static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
3166 uint64_t reftable_index, uint64_t *reftable_size,
3167 void *refblock, bool refblock_empty, bool *allocated,
3168 Error **errp)
3169{
3170 BDRVQcow2State *s = bs->opaque;
3171 int64_t offset;
3172 int ret;
3173
3174 if (reftable_index < *reftable_size && (*reftable)[reftable_index]) {
3175 offset = (*reftable)[reftable_index];
3176
3177 ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size,
3178 false);
3179 if (ret < 0) {
3180 error_setg_errno(errp, -ret, "Overlap check failed");
3181 return ret;
3182 }
3183
3184 ret = bdrv_pwrite(bs->file, offset, s->cluster_size, refblock, 0);
3185 if (ret < 0) {
3186 error_setg_errno(errp, -ret, "Failed to write refblock");
3187 return ret;
3188 }
3189 } else {
3190 assert(refblock_empty);
3191 }
3192
3193 return 0;
3194}
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
3210 uint64_t *new_reftable_index,
3211 uint64_t *new_reftable_size,
3212 void *new_refblock, int new_refblock_size,
3213 int new_refcount_bits,
3214 RefblockFinishOp *operation, bool *allocated,
3215 Qcow2SetRefcountFunc *new_set_refcount,
3216 BlockDriverAmendStatusCB *status_cb,
3217 void *cb_opaque, int index, int total,
3218 Error **errp)
3219{
3220 BDRVQcow2State *s = bs->opaque;
3221 uint64_t reftable_index;
3222 bool new_refblock_empty = true;
3223 int refblock_index;
3224 int new_refblock_index = 0;
3225 int ret;
3226
3227 for (reftable_index = 0; reftable_index < s->refcount_table_size;
3228 reftable_index++)
3229 {
3230 uint64_t refblock_offset = s->refcount_table[reftable_index]
3231 & REFT_OFFSET_MASK;
3232
3233 status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index,
3234 (uint64_t)total * s->refcount_table_size, cb_opaque);
3235
3236 if (refblock_offset) {
3237 void *refblock;
3238
3239 if (offset_into_cluster(s, refblock_offset)) {
3240 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
3241 PRIx64 " unaligned (reftable index: %#"
3242 PRIx64 ")", refblock_offset,
3243 reftable_index);
3244 error_setg(errp,
3245 "Image is corrupt (unaligned refblock offset)");
3246 return -EIO;
3247 }
3248
3249 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset,
3250 &refblock);
3251 if (ret < 0) {
3252 error_setg_errno(errp, -ret, "Failed to retrieve refblock");
3253 return ret;
3254 }
3255
3256 for (refblock_index = 0; refblock_index < s->refcount_block_size;
3257 refblock_index++)
3258 {
3259 uint64_t refcount;
3260
3261 if (new_refblock_index >= new_refblock_size) {
3262
3263 ret = operation(bs, new_reftable, *new_reftable_index,
3264 new_reftable_size, new_refblock,
3265 new_refblock_empty, allocated, errp);
3266 if (ret < 0) {
3267 qcow2_cache_put(s->refcount_block_cache, &refblock);
3268 return ret;
3269 }
3270
3271 (*new_reftable_index)++;
3272 new_refblock_index = 0;
3273 new_refblock_empty = true;
3274 }
3275
3276 refcount = s->get_refcount(refblock, refblock_index);
3277 if (new_refcount_bits < 64 && refcount >> new_refcount_bits) {
3278 uint64_t offset;
3279
3280 qcow2_cache_put(s->refcount_block_cache, &refblock);
3281
3282 offset = ((reftable_index << s->refcount_block_bits)
3283 + refblock_index) << s->cluster_bits;
3284
3285 error_setg(errp, "Cannot decrease refcount entry width to "
3286 "%i bits: Cluster at offset %#" PRIx64 " has a "
3287 "refcount of %" PRIu64, new_refcount_bits,
3288 offset, refcount);
3289 return -EINVAL;
3290 }
3291
3292 if (new_set_refcount) {
3293 new_set_refcount(new_refblock, new_refblock_index++,
3294 refcount);
3295 } else {
3296 new_refblock_index++;
3297 }
3298 new_refblock_empty = new_refblock_empty && refcount == 0;
3299 }
3300
3301 qcow2_cache_put(s->refcount_block_cache, &refblock);
3302 } else {
3303
3304 for (refblock_index = 0; refblock_index < s->refcount_block_size;
3305 refblock_index++)
3306 {
3307 if (new_refblock_index >= new_refblock_size) {
3308
3309 ret = operation(bs, new_reftable, *new_reftable_index,
3310 new_reftable_size, new_refblock,
3311 new_refblock_empty, allocated, errp);
3312 if (ret < 0) {
3313 return ret;
3314 }
3315
3316 (*new_reftable_index)++;
3317 new_refblock_index = 0;
3318 new_refblock_empty = true;
3319 }
3320
3321 if (new_set_refcount) {
3322 new_set_refcount(new_refblock, new_refblock_index++, 0);
3323 } else {
3324 new_refblock_index++;
3325 }
3326 }
3327 }
3328 }
3329
3330 if (new_refblock_index > 0) {
3331
3332 if (new_set_refcount) {
3333 for (; new_refblock_index < new_refblock_size;
3334 new_refblock_index++)
3335 {
3336 new_set_refcount(new_refblock, new_refblock_index, 0);
3337 }
3338 }
3339
3340 ret = operation(bs, new_reftable, *new_reftable_index,
3341 new_reftable_size, new_refblock, new_refblock_empty,
3342 allocated, errp);
3343 if (ret < 0) {
3344 return ret;
3345 }
3346
3347 (*new_reftable_index)++;
3348 }
3349
3350 status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size,
3351 (uint64_t)total * s->refcount_table_size, cb_opaque);
3352
3353 return 0;
3354}
3355
3356int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
3357 BlockDriverAmendStatusCB *status_cb,
3358 void *cb_opaque, Error **errp)
3359{
3360 BDRVQcow2State *s = bs->opaque;
3361 Qcow2GetRefcountFunc *new_get_refcount;
3362 Qcow2SetRefcountFunc *new_set_refcount;
3363 void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size);
3364 uint64_t *new_reftable = NULL, new_reftable_size = 0;
3365 uint64_t *old_reftable, old_reftable_size, old_reftable_offset;
3366 uint64_t new_reftable_index = 0;
3367 uint64_t i;
3368 int64_t new_reftable_offset = 0, allocated_reftable_size = 0;
3369 int new_refblock_size, new_refcount_bits = 1 << refcount_order;
3370 int old_refcount_order;
3371 int walk_index = 0;
3372 int ret;
3373 bool new_allocation;
3374
3375 assert(s->qcow_version >= 3);
3376 assert(refcount_order >= 0 && refcount_order <= 6);
3377
3378
3379 new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3));
3380
3381 new_get_refcount = get_refcount_funcs[refcount_order];
3382 new_set_refcount = set_refcount_funcs[refcount_order];
3383
3384
3385 do {
3386 int total_walks;
3387
3388 new_allocation = false;
3389
3390
3391
3392
3393
3394
3395 total_walks = MAX(walk_index + 2, 3);
3396
3397
3398
3399 ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
3400 &new_reftable_size, NULL, new_refblock_size,
3401 new_refcount_bits, &alloc_refblock,
3402 &new_allocation, NULL, status_cb, cb_opaque,
3403 walk_index++, total_walks, errp);
3404 if (ret < 0) {
3405 goto done;
3406 }
3407
3408 new_reftable_index = 0;
3409
3410 if (new_allocation) {
3411 if (new_reftable_offset) {
3412 qcow2_free_clusters(
3413 bs, new_reftable_offset,
3414 allocated_reftable_size * REFTABLE_ENTRY_SIZE,
3415 QCOW2_DISCARD_NEVER);
3416 }
3417
3418 new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size *
3419 REFTABLE_ENTRY_SIZE);
3420 if (new_reftable_offset < 0) {
3421 error_setg_errno(errp, -new_reftable_offset,
3422 "Failed to allocate the new reftable");
3423 ret = new_reftable_offset;
3424 goto done;
3425 }
3426 allocated_reftable_size = new_reftable_size;
3427 }
3428 } while (new_allocation);
3429
3430
3431 ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
3432 &new_reftable_size, new_refblock,
3433 new_refblock_size, new_refcount_bits,
3434 &flush_refblock, &new_allocation, new_set_refcount,
3435 status_cb, cb_opaque, walk_index, walk_index + 1,
3436 errp);
3437 if (ret < 0) {
3438 goto done;
3439 }
3440 assert(!new_allocation);
3441
3442
3443
3444 ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset,
3445 new_reftable_size * REFTABLE_ENTRY_SIZE,
3446 false);
3447 if (ret < 0) {
3448 error_setg_errno(errp, -ret, "Overlap check failed");
3449 goto done;
3450 }
3451
3452 for (i = 0; i < new_reftable_size; i++) {
3453 cpu_to_be64s(&new_reftable[i]);
3454 }
3455
3456 ret = bdrv_pwrite(bs->file, new_reftable_offset,
3457 new_reftable_size * REFTABLE_ENTRY_SIZE, new_reftable,
3458 0);
3459
3460 for (i = 0; i < new_reftable_size; i++) {
3461 be64_to_cpus(&new_reftable[i]);
3462 }
3463
3464 if (ret < 0) {
3465 error_setg_errno(errp, -ret, "Failed to write the new reftable");
3466 goto done;
3467 }
3468
3469
3470
3471 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
3472 if (ret < 0) {
3473 error_setg_errno(errp, -ret, "Failed to flush the refblock cache");
3474 goto done;
3475 }
3476
3477
3478
3479
3480
3481 old_refcount_order = s->refcount_order;
3482 old_reftable_size = s->refcount_table_size;
3483 old_reftable_offset = s->refcount_table_offset;
3484
3485 s->refcount_order = refcount_order;
3486 s->refcount_table_size = new_reftable_size;
3487 s->refcount_table_offset = new_reftable_offset;
3488
3489 ret = qcow2_update_header(bs);
3490 if (ret < 0) {
3491 s->refcount_order = old_refcount_order;
3492 s->refcount_table_size = old_reftable_size;
3493 s->refcount_table_offset = old_reftable_offset;
3494 error_setg_errno(errp, -ret, "Failed to update the qcow2 header");
3495 goto done;
3496 }
3497
3498
3499 old_reftable = s->refcount_table;
3500 s->refcount_table = new_reftable;
3501 update_max_refcount_table_index(s);
3502
3503 s->refcount_bits = 1 << refcount_order;
3504 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
3505 s->refcount_max += s->refcount_max - 1;
3506
3507 s->refcount_block_bits = s->cluster_bits - (refcount_order - 3);
3508 s->refcount_block_size = 1 << s->refcount_block_bits;
3509
3510 s->get_refcount = new_get_refcount;
3511 s->set_refcount = new_set_refcount;
3512
3513
3514
3515 new_reftable = old_reftable;
3516 new_reftable_size = old_reftable_size;
3517 new_reftable_offset = old_reftable_offset;
3518
3519done:
3520 if (new_reftable) {
3521
3522
3523
3524 for (i = 0; i < new_reftable_size; i++) {
3525 uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK;
3526 if (offset) {
3527 qcow2_free_clusters(bs, offset, s->cluster_size,
3528 QCOW2_DISCARD_OTHER);
3529 }
3530 }
3531 g_free(new_reftable);
3532
3533 if (new_reftable_offset > 0) {
3534 qcow2_free_clusters(bs, new_reftable_offset,
3535 new_reftable_size * REFTABLE_ENTRY_SIZE,
3536 QCOW2_DISCARD_OTHER);
3537 }
3538 }
3539
3540 qemu_vfree(new_refblock);
3541 return ret;
3542}
3543
3544static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset)
3545{
3546 BDRVQcow2State *s = bs->opaque;
3547 uint32_t index = offset_to_reftable_index(s, offset);
3548 int64_t covering_refblock_offset = 0;
3549
3550 if (index < s->refcount_table_size) {
3551 covering_refblock_offset = s->refcount_table[index] & REFT_OFFSET_MASK;
3552 }
3553 if (!covering_refblock_offset) {
3554 qcow2_signal_corruption(bs, true, -1, -1, "Refblock at %#" PRIx64 " is "
3555 "not covered by the refcount structures",
3556 offset);
3557 return -EIO;
3558 }
3559
3560 return covering_refblock_offset;
3561}
3562
3563static int coroutine_fn
3564qcow2_discard_refcount_block(BlockDriverState *bs, uint64_t discard_block_offs)
3565{
3566 BDRVQcow2State *s = bs->opaque;
3567 int64_t refblock_offs;
3568 uint64_t cluster_index = discard_block_offs >> s->cluster_bits;
3569 uint32_t block_index = cluster_index & (s->refcount_block_size - 1);
3570 void *refblock;
3571 int ret;
3572
3573 refblock_offs = get_refblock_offset(bs, discard_block_offs);
3574 if (refblock_offs < 0) {
3575 return refblock_offs;
3576 }
3577
3578 assert(discard_block_offs != 0);
3579
3580 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
3581 &refblock);
3582 if (ret < 0) {
3583 return ret;
3584 }
3585
3586 if (s->get_refcount(refblock, block_index) != 1) {
3587 qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:"
3588 " refblock offset %#" PRIx64
3589 ", reftable index %u"
3590 ", block offset %#" PRIx64
3591 ", refcount %#" PRIx64,
3592 refblock_offs,
3593 offset_to_reftable_index(s, discard_block_offs),
3594 discard_block_offs,
3595 s->get_refcount(refblock, block_index));
3596 qcow2_cache_put(s->refcount_block_cache, &refblock);
3597 return -EINVAL;
3598 }
3599 s->set_refcount(refblock, block_index, 0);
3600
3601 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock);
3602
3603 qcow2_cache_put(s->refcount_block_cache, &refblock);
3604
3605 if (cluster_index < s->free_cluster_index) {
3606 s->free_cluster_index = cluster_index;
3607 }
3608
3609 refblock = qcow2_cache_is_table_offset(s->refcount_block_cache,
3610 discard_block_offs);
3611 if (refblock) {
3612
3613 qcow2_cache_discard(s->refcount_block_cache, refblock);
3614 }
3615 update_refcount_discard(bs, discard_block_offs, s->cluster_size);
3616
3617 return 0;
3618}
3619
3620int coroutine_fn qcow2_shrink_reftable(BlockDriverState *bs)
3621{
3622 BDRVQcow2State *s = bs->opaque;
3623 uint64_t *reftable_tmp =
3624 g_malloc(s->refcount_table_size * REFTABLE_ENTRY_SIZE);
3625 int i, ret;
3626
3627 for (i = 0; i < s->refcount_table_size; i++) {
3628 int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK;
3629 void *refblock;
3630 bool unused_block;
3631
3632 if (refblock_offs == 0) {
3633 reftable_tmp[i] = 0;
3634 continue;
3635 }
3636 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
3637 &refblock);
3638 if (ret < 0) {
3639 goto out;
3640 }
3641
3642
3643 if (i == offset_to_reftable_index(s, refblock_offs)) {
3644 uint64_t block_index = (refblock_offs >> s->cluster_bits) &
3645 (s->refcount_block_size - 1);
3646 uint64_t refcount = s->get_refcount(refblock, block_index);
3647
3648 s->set_refcount(refblock, block_index, 0);
3649
3650 unused_block = buffer_is_zero(refblock, s->cluster_size);
3651
3652 s->set_refcount(refblock, block_index, refcount);
3653 } else {
3654 unused_block = buffer_is_zero(refblock, s->cluster_size);
3655 }
3656 qcow2_cache_put(s->refcount_block_cache, &refblock);
3657
3658 reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
3659 }
3660
3661 ret = bdrv_co_pwrite_sync(bs->file, s->refcount_table_offset,
3662 s->refcount_table_size * REFTABLE_ENTRY_SIZE,
3663 reftable_tmp, 0);
3664
3665
3666
3667
3668
3669 for (i = 0; i < s->refcount_table_size; i++) {
3670 if (s->refcount_table[i] && !reftable_tmp[i]) {
3671 if (ret == 0) {
3672 ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] &
3673 REFT_OFFSET_MASK);
3674 }
3675 s->refcount_table[i] = 0;
3676 }
3677 }
3678
3679 if (!s->cache_discards) {
3680 qcow2_process_discards(bs, ret);
3681 }
3682
3683out:
3684 g_free(reftable_tmp);
3685 return ret;
3686}
3687
3688int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
3689{
3690 BDRVQcow2State *s = bs->opaque;
3691 int64_t i;
3692
3693 for (i = size_to_clusters(s, size) - 1; i >= 0; i--) {
3694 uint64_t refcount;
3695 int ret = qcow2_get_refcount(bs, i, &refcount);
3696 if (ret < 0) {
3697 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
3698 i, strerror(-ret));
3699 return ret;
3700 }
3701 if (refcount > 0) {
3702 return i;
3703 }
3704 }
3705 qcow2_signal_corruption(bs, true, -1, -1,
3706 "There are no references in the refcount table.");
3707 return -EIO;
3708}
3709
3710int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs)
3711{
3712 BDRVQcow2State *s = bs->opaque;
3713 int64_t i, end_cluster, cluster_count = 0, threshold;
3714 int64_t file_length, real_allocation, real_clusters;
3715
3716 qemu_co_mutex_assert_locked(&s->lock);
3717
3718 file_length = bdrv_getlength(bs->file->bs);
3719 if (file_length < 0) {
3720 return file_length;
3721 }
3722
3723 real_allocation = bdrv_co_get_allocated_file_size(bs->file->bs);
3724 if (real_allocation < 0) {
3725 return real_allocation;
3726 }
3727
3728 real_clusters = real_allocation / s->cluster_size;
3729 threshold = MAX(real_clusters * 10 / 9, real_clusters + 2);
3730
3731 end_cluster = size_to_clusters(s, file_length);
3732 for (i = 0; i < end_cluster && cluster_count < threshold; i++) {
3733 uint64_t refcount;
3734 int ret = qcow2_get_refcount(bs, i, &refcount);
3735 if (ret < 0) {
3736 return ret;
3737 }
3738 cluster_count += !!refcount;
3739 }
3740
3741 return cluster_count >= threshold;
3742}
3743