1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu-common.h"
26#include "block_int.h"
27#include "module.h"
28#include "migration.h"
29
30
31
32#define HEADER_SIZE 512
33
34
35
36enum vhd_type {
37 VHD_FIXED = 2,
38 VHD_DYNAMIC = 3,
39 VHD_DIFFERENCING = 4,
40};
41
42
43#define VHD_TIMESTAMP_BASE 946684800
44
45
46struct vhd_footer {
47 char creator[8];
48 uint32_t features;
49 uint32_t version;
50
51
52 uint64_t data_offset;
53
54
55 uint32_t timestamp;
56
57 char creator_app[4];
58 uint16_t major;
59 uint16_t minor;
60 char creator_os[4];
61
62 uint64_t orig_size;
63 uint64_t size;
64
65 uint16_t cyls;
66 uint8_t heads;
67 uint8_t secs_per_cyl;
68
69 uint32_t type;
70
71
72
73 uint32_t checksum;
74
75
76 uint8_t uuid[16];
77
78 uint8_t in_saved_state;
79};
80
81struct vhd_dyndisk_header {
82 char magic[8];
83
84
85 uint64_t data_offset;
86
87
88 uint64_t table_offset;
89
90 uint32_t version;
91 uint32_t max_table_entries;
92
93
94 uint32_t block_size;
95
96 uint32_t checksum;
97 uint8_t parent_uuid[16];
98 uint32_t parent_timestamp;
99 uint32_t reserved;
100
101
102 uint8_t parent_name[512];
103
104 struct {
105 uint32_t platform;
106 uint32_t data_space;
107 uint32_t data_length;
108 uint32_t reserved;
109 uint64_t data_offset;
110 } parent_locator[8];
111};
112
113typedef struct BDRVVPCState {
114 CoMutex lock;
115 uint8_t footer_buf[HEADER_SIZE];
116 uint64_t free_data_block_offset;
117 int max_table_entries;
118 uint32_t *pagetable;
119 uint64_t bat_offset;
120 uint64_t last_bitmap_offset;
121
122 uint32_t block_size;
123 uint32_t bitmap_size;
124
125#ifdef CACHE
126 uint8_t *pageentry_u8;
127 uint32_t *pageentry_u32;
128 uint16_t *pageentry_u16;
129
130 uint64_t last_bitmap;
131#endif
132
133 Error *migration_blocker;
134} BDRVVPCState;
135
136static uint32_t vpc_checksum(uint8_t* buf, size_t size)
137{
138 uint32_t res = 0;
139 int i;
140
141 for (i = 0; i < size; i++)
142 res += buf[i];
143
144 return ~res;
145}
146
147
148static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
149{
150 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
151 return 100;
152 return 0;
153}
154
155static int vpc_open(BlockDriverState *bs, int flags)
156{
157 BDRVVPCState *s = bs->opaque;
158 int i;
159 struct vhd_footer* footer;
160 struct vhd_dyndisk_header* dyndisk_header;
161 uint8_t buf[HEADER_SIZE];
162 uint32_t checksum;
163 int err = -1;
164 int disk_type = VHD_DYNAMIC;
165
166 if (bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE)
167 goto fail;
168
169 footer = (struct vhd_footer*) s->footer_buf;
170 if (strncmp(footer->creator, "conectix", 8)) {
171 int64_t offset = bdrv_getlength(bs->file);
172 if (offset < HEADER_SIZE) {
173 goto fail;
174 }
175
176 if (bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf, HEADER_SIZE)
177 != HEADER_SIZE) {
178 goto fail;
179 }
180 if (strncmp(footer->creator, "conectix", 8)) {
181 goto fail;
182 }
183 disk_type = VHD_FIXED;
184 }
185
186 checksum = be32_to_cpu(footer->checksum);
187 footer->checksum = 0;
188 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
189 fprintf(stderr, "block-vpc: The header checksum of '%s' is "
190 "incorrect.\n", bs->filename);
191
192
193 footer->checksum = be32_to_cpu(checksum);
194
195
196
197
198 bs->total_sectors = (int64_t)
199 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
200
201 if (bs->total_sectors >= 65535 * 16 * 255) {
202 err = -EFBIG;
203 goto fail;
204 }
205
206 if (disk_type == VHD_DYNAMIC) {
207 if (bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
208 HEADER_SIZE) != HEADER_SIZE) {
209 goto fail;
210 }
211
212 dyndisk_header = (struct vhd_dyndisk_header *) buf;
213
214 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
215 goto fail;
216 }
217
218 s->block_size = be32_to_cpu(dyndisk_header->block_size);
219 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
220
221 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
222 s->pagetable = g_malloc(s->max_table_entries * 4);
223
224 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
225 if (bdrv_pread(bs->file, s->bat_offset, s->pagetable,
226 s->max_table_entries * 4) != s->max_table_entries * 4) {
227 goto fail;
228 }
229
230 s->free_data_block_offset =
231 (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
232
233 for (i = 0; i < s->max_table_entries; i++) {
234 be32_to_cpus(&s->pagetable[i]);
235 if (s->pagetable[i] != 0xFFFFFFFF) {
236 int64_t next = (512 * (int64_t) s->pagetable[i]) +
237 s->bitmap_size + s->block_size;
238
239 if (next > s->free_data_block_offset) {
240 s->free_data_block_offset = next;
241 }
242 }
243 }
244
245 s->last_bitmap_offset = (int64_t) -1;
246
247#ifdef CACHE
248 s->pageentry_u8 = g_malloc(512);
249 s->pageentry_u32 = s->pageentry_u8;
250 s->pageentry_u16 = s->pageentry_u8;
251 s->last_pagetable = -1;
252#endif
253 }
254
255 qemu_co_mutex_init(&s->lock);
256
257
258 error_set(&s->migration_blocker,
259 QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
260 "vpc", bs->device_name, "live migration");
261 migrate_add_blocker(s->migration_blocker);
262
263 return 0;
264 fail:
265 return err;
266}
267
268
269
270
271
272
273
274
275static inline int64_t get_sector_offset(BlockDriverState *bs,
276 int64_t sector_num, int write)
277{
278 BDRVVPCState *s = bs->opaque;
279 uint64_t offset = sector_num * 512;
280 uint64_t bitmap_offset, block_offset;
281 uint32_t pagetable_index, pageentry_index;
282
283 pagetable_index = offset / s->block_size;
284 pageentry_index = (offset % s->block_size) / 512;
285
286 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
287 return -1;
288
289 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
290 block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
291
292
293
294
295
296
297 if (write && (s->last_bitmap_offset != bitmap_offset)) {
298 uint8_t bitmap[s->bitmap_size];
299
300 s->last_bitmap_offset = bitmap_offset;
301 memset(bitmap, 0xff, s->bitmap_size);
302 bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
303 }
304
305
306
307
308
309
310#if 0
311#ifdef CACHE
312 if (bitmap_offset != s->last_bitmap)
313 {
314 lseek(s->fd, bitmap_offset, SEEK_SET);
315
316 s->last_bitmap = bitmap_offset;
317
318
319
320 read(s->fd, s->pageentry_u8, 512);
321 for (i = 0; i < 128; i++)
322 be32_to_cpus(&s->pageentry_u32[i]);
323 }
324
325 if ((s->pageentry_u8[pageentry_index / 8] >> (pageentry_index % 8)) & 1)
326 return -1;
327#else
328 lseek(s->fd, bitmap_offset + (pageentry_index / 8), SEEK_SET);
329
330 read(s->fd, &bitmap_entry, 1);
331
332 if ((bitmap_entry >> (pageentry_index % 8)) & 1)
333 return -1;
334#endif
335#endif
336
337 return block_offset;
338}
339
340
341
342
343
344
345
346static int rewrite_footer(BlockDriverState* bs)
347{
348 int ret;
349 BDRVVPCState *s = bs->opaque;
350 int64_t offset = s->free_data_block_offset;
351
352 ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
353 if (ret < 0)
354 return ret;
355
356 return 0;
357}
358
359
360
361
362
363
364
365
366static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
367{
368 BDRVVPCState *s = bs->opaque;
369 int64_t bat_offset;
370 uint32_t index, bat_value;
371 int ret;
372 uint8_t bitmap[s->bitmap_size];
373
374
375 if ((sector_num < 0) || (sector_num > bs->total_sectors))
376 return -1;
377
378
379 index = (sector_num * 512) / s->block_size;
380 if (s->pagetable[index] != 0xFFFFFFFF)
381 return -1;
382
383 s->pagetable[index] = s->free_data_block_offset / 512;
384
385
386 memset(bitmap, 0xff, s->bitmap_size);
387 ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
388 s->bitmap_size);
389 if (ret < 0) {
390 return ret;
391 }
392
393
394 s->free_data_block_offset += s->block_size + s->bitmap_size;
395 ret = rewrite_footer(bs);
396 if (ret < 0)
397 goto fail;
398
399
400 bat_offset = s->bat_offset + (4 * index);
401 bat_value = be32_to_cpu(s->pagetable[index]);
402 ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
403 if (ret < 0)
404 goto fail;
405
406 return get_sector_offset(bs, sector_num, 0);
407
408fail:
409 s->free_data_block_offset -= (s->block_size + s->bitmap_size);
410 return -1;
411}
412
413static int vpc_read(BlockDriverState *bs, int64_t sector_num,
414 uint8_t *buf, int nb_sectors)
415{
416 BDRVVPCState *s = bs->opaque;
417 int ret;
418 int64_t offset;
419 int64_t sectors, sectors_per_block;
420 struct vhd_footer *footer = (struct vhd_footer *) s->footer_buf;
421
422 if (cpu_to_be32(footer->type) == VHD_FIXED) {
423 return bdrv_read(bs->file, sector_num, buf, nb_sectors);
424 }
425 while (nb_sectors > 0) {
426 offset = get_sector_offset(bs, sector_num, 0);
427
428 sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
429 sectors = sectors_per_block - (sector_num % sectors_per_block);
430 if (sectors > nb_sectors) {
431 sectors = nb_sectors;
432 }
433
434 if (offset == -1) {
435 memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
436 } else {
437 ret = bdrv_pread(bs->file, offset, buf,
438 sectors * BDRV_SECTOR_SIZE);
439 if (ret != sectors * BDRV_SECTOR_SIZE) {
440 return -1;
441 }
442 }
443
444 nb_sectors -= sectors;
445 sector_num += sectors;
446 buf += sectors * BDRV_SECTOR_SIZE;
447 }
448 return 0;
449}
450
451static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
452 uint8_t *buf, int nb_sectors)
453{
454 int ret;
455 BDRVVPCState *s = bs->opaque;
456 qemu_co_mutex_lock(&s->lock);
457 ret = vpc_read(bs, sector_num, buf, nb_sectors);
458 qemu_co_mutex_unlock(&s->lock);
459 return ret;
460}
461
462static int vpc_write(BlockDriverState *bs, int64_t sector_num,
463 const uint8_t *buf, int nb_sectors)
464{
465 BDRVVPCState *s = bs->opaque;
466 int64_t offset;
467 int64_t sectors, sectors_per_block;
468 int ret;
469 struct vhd_footer *footer = (struct vhd_footer *) s->footer_buf;
470
471 if (cpu_to_be32(footer->type) == VHD_FIXED) {
472 return bdrv_write(bs->file, sector_num, buf, nb_sectors);
473 }
474 while (nb_sectors > 0) {
475 offset = get_sector_offset(bs, sector_num, 1);
476
477 sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
478 sectors = sectors_per_block - (sector_num % sectors_per_block);
479 if (sectors > nb_sectors) {
480 sectors = nb_sectors;
481 }
482
483 if (offset == -1) {
484 offset = alloc_block(bs, sector_num);
485 if (offset < 0)
486 return -1;
487 }
488
489 ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
490 if (ret != sectors * BDRV_SECTOR_SIZE) {
491 return -1;
492 }
493
494 nb_sectors -= sectors;
495 sector_num += sectors;
496 buf += sectors * BDRV_SECTOR_SIZE;
497 }
498
499 return 0;
500}
501
502static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
503 const uint8_t *buf, int nb_sectors)
504{
505 int ret;
506 BDRVVPCState *s = bs->opaque;
507 qemu_co_mutex_lock(&s->lock);
508 ret = vpc_write(bs, sector_num, buf, nb_sectors);
509 qemu_co_mutex_unlock(&s->lock);
510 return ret;
511}
512
513
514
515
516
517
518
519
520
521
522
523static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
524 uint8_t* heads, uint8_t* secs_per_cyl)
525{
526 uint32_t cyls_times_heads;
527
528 if (total_sectors > 65535 * 16 * 255)
529 return -EFBIG;
530
531 if (total_sectors > 65535 * 16 * 63) {
532 *secs_per_cyl = 255;
533 *heads = 16;
534 cyls_times_heads = total_sectors / *secs_per_cyl;
535 } else {
536 *secs_per_cyl = 17;
537 cyls_times_heads = total_sectors / *secs_per_cyl;
538 *heads = (cyls_times_heads + 1023) / 1024;
539
540 if (*heads < 4)
541 *heads = 4;
542
543 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
544 *secs_per_cyl = 31;
545 *heads = 16;
546 cyls_times_heads = total_sectors / *secs_per_cyl;
547 }
548
549 if (cyls_times_heads >= (*heads * 1024)) {
550 *secs_per_cyl = 63;
551 *heads = 16;
552 cyls_times_heads = total_sectors / *secs_per_cyl;
553 }
554 }
555
556 *cyls = cyls_times_heads / *heads;
557
558 return 0;
559}
560
561static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
562{
563 struct vhd_dyndisk_header* dyndisk_header =
564 (struct vhd_dyndisk_header*) buf;
565 size_t block_size, num_bat_entries;
566 int i;
567 int ret = -EIO;
568
569
570 block_size = 0x200000;
571 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
572
573 if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
574 goto fail;
575 }
576
577 if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0) {
578 goto fail;
579 }
580 if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
581 goto fail;
582 }
583
584
585 if (lseek(fd, 3 * 512, SEEK_SET) < 0) {
586 goto fail;
587 }
588
589 memset(buf, 0xFF, 512);
590 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
591 if (write(fd, buf, 512) != 512) {
592 goto fail;
593 }
594 }
595
596
597 memset(buf, 0, 1024);
598
599 memcpy(dyndisk_header->magic, "cxsparse", 8);
600
601
602
603
604
605 dyndisk_header->data_offset = be64_to_cpu(0xFFFFFFFFFFFFFFFFULL);
606 dyndisk_header->table_offset = be64_to_cpu(3 * 512);
607 dyndisk_header->version = be32_to_cpu(0x00010000);
608 dyndisk_header->block_size = be32_to_cpu(block_size);
609 dyndisk_header->max_table_entries = be32_to_cpu(num_bat_entries);
610
611 dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024));
612
613
614 if (lseek(fd, 512, SEEK_SET) < 0) {
615 goto fail;
616 }
617
618 if (write(fd, buf, 1024) != 1024) {
619 goto fail;
620 }
621 ret = 0;
622
623 fail:
624 return ret;
625}
626
627static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size)
628{
629 int ret = -EIO;
630
631
632 total_size += 512;
633 if (ftruncate(fd, total_size) != 0) {
634 ret = -errno;
635 goto fail;
636 }
637 if (lseek(fd, -512, SEEK_END) < 0) {
638 goto fail;
639 }
640 if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
641 goto fail;
642 }
643
644 ret = 0;
645
646 fail:
647 return ret;
648}
649
650static int vpc_create(const char *filename, QEMUOptionParameter *options)
651{
652 uint8_t buf[1024];
653 struct vhd_footer *footer = (struct vhd_footer *) buf;
654 QEMUOptionParameter *disk_type_param;
655 int fd, i;
656 uint16_t cyls = 0;
657 uint8_t heads = 0;
658 uint8_t secs_per_cyl = 0;
659 int64_t total_sectors;
660 int64_t total_size;
661 int disk_type;
662 int ret = -EIO;
663
664
665 total_size = get_option_parameter(options, BLOCK_OPT_SIZE)->value.n;
666
667 disk_type_param = get_option_parameter(options, BLOCK_OPT_SUBFMT);
668 if (disk_type_param && disk_type_param->value.s) {
669 if (!strcmp(disk_type_param->value.s, "dynamic")) {
670 disk_type = VHD_DYNAMIC;
671 } else if (!strcmp(disk_type_param->value.s, "fixed")) {
672 disk_type = VHD_FIXED;
673 } else {
674 return -EINVAL;
675 }
676 } else {
677 disk_type = VHD_DYNAMIC;
678 }
679
680
681 fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
682 if (fd < 0) {
683 return -EIO;
684 }
685
686
687
688
689
690
691 total_sectors = total_size / BDRV_SECTOR_SIZE;
692 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
693 if (calculate_geometry(total_sectors + i, &cyls, &heads,
694 &secs_per_cyl))
695 {
696 ret = -EFBIG;
697 goto fail;
698 }
699 }
700
701 total_sectors = (int64_t) cyls * heads * secs_per_cyl;
702
703
704 memset(buf, 0, 1024);
705
706 memcpy(footer->creator, "conectix", 8);
707
708 memcpy(footer->creator_app, "qemu", 4);
709 memcpy(footer->creator_os, "Wi2k", 4);
710
711 footer->features = be32_to_cpu(0x02);
712 footer->version = be32_to_cpu(0x00010000);
713 if (disk_type == VHD_DYNAMIC) {
714 footer->data_offset = be64_to_cpu(HEADER_SIZE);
715 } else {
716 footer->data_offset = be64_to_cpu(0xFFFFFFFFFFFFFFFFULL);
717 }
718 footer->timestamp = be32_to_cpu(time(NULL) - VHD_TIMESTAMP_BASE);
719
720
721 footer->major = be16_to_cpu(0x0005);
722 footer->minor = be16_to_cpu(0x0003);
723 if (disk_type == VHD_DYNAMIC) {
724 footer->orig_size = be64_to_cpu(total_sectors * 512);
725 footer->size = be64_to_cpu(total_sectors * 512);
726 } else {
727 footer->orig_size = be64_to_cpu(total_size);
728 footer->size = be64_to_cpu(total_size);
729 }
730 footer->cyls = be16_to_cpu(cyls);
731 footer->heads = heads;
732 footer->secs_per_cyl = secs_per_cyl;
733
734 footer->type = be32_to_cpu(disk_type);
735
736
737
738 footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
739
740 if (disk_type == VHD_DYNAMIC) {
741 ret = create_dynamic_disk(fd, buf, total_sectors);
742 } else {
743 ret = create_fixed_disk(fd, buf, total_size);
744 }
745
746 fail:
747 qemu_close(fd);
748 return ret;
749}
750
751static void vpc_close(BlockDriverState *bs)
752{
753 BDRVVPCState *s = bs->opaque;
754 g_free(s->pagetable);
755#ifdef CACHE
756 g_free(s->pageentry_u8);
757#endif
758
759 migrate_del_blocker(s->migration_blocker);
760 error_free(s->migration_blocker);
761}
762
763static QEMUOptionParameter vpc_create_options[] = {
764 {
765 .name = BLOCK_OPT_SIZE,
766 .type = OPT_SIZE,
767 .help = "Virtual disk size"
768 },
769 {
770 .name = BLOCK_OPT_SUBFMT,
771 .type = OPT_STRING,
772 .help =
773 "Type of virtual hard disk format. Supported formats are "
774 "{dynamic (default) | fixed} "
775 },
776 { NULL }
777};
778
779static BlockDriver bdrv_vpc = {
780 .format_name = "vpc",
781 .instance_size = sizeof(BDRVVPCState),
782
783 .bdrv_probe = vpc_probe,
784 .bdrv_open = vpc_open,
785 .bdrv_close = vpc_close,
786 .bdrv_create = vpc_create,
787
788 .bdrv_read = vpc_co_read,
789 .bdrv_write = vpc_co_write,
790
791 .create_options = vpc_create_options,
792};
793
794static void bdrv_vpc_init(void)
795{
796 bdrv_register(&bdrv_vpc);
797}
798
799block_init(bdrv_vpc_init);
800