qemu/block/vpc.c
<<
>>
Prefs
   1/*
   2 * Block driver for Connectix / Microsoft Virtual PC images
   3 *
   4 * Copyright (c) 2005 Alex Beregszaszi
   5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25#include "qemu-common.h"
  26#include "block/block_int.h"
  27#include "qemu/module.h"
  28#include "migration/migration.h"
  29#if defined(CONFIG_UUID)
  30#include <uuid/uuid.h>
  31#endif
  32
  33/**************************************************************/
  34
  35#define HEADER_SIZE 512
  36
  37//#define CACHE
  38
  39enum vhd_type {
  40    VHD_FIXED           = 2,
  41    VHD_DYNAMIC         = 3,
  42    VHD_DIFFERENCING    = 4,
  43};
  44
  45// Seconds since Jan 1, 2000 0:00:00 (UTC)
  46#define VHD_TIMESTAMP_BASE 946684800
  47
  48#define VHD_MAX_SECTORS       (65535LL * 255 * 255)
  49#define VHD_MAX_GEOMETRY      (65535LL *  16 * 255)
  50
  51// always big-endian
  52typedef struct vhd_footer {
  53    char        creator[8]; // "conectix"
  54    uint32_t    features;
  55    uint32_t    version;
  56
  57    // Offset of next header structure, 0xFFFFFFFF if none
  58    uint64_t    data_offset;
  59
  60    // Seconds since Jan 1, 2000 0:00:00 (UTC)
  61    uint32_t    timestamp;
  62
  63    char        creator_app[4]; // "vpc "
  64    uint16_t    major;
  65    uint16_t    minor;
  66    char        creator_os[4]; // "Wi2k"
  67
  68    uint64_t    orig_size;
  69    uint64_t    current_size;
  70
  71    uint16_t    cyls;
  72    uint8_t     heads;
  73    uint8_t     secs_per_cyl;
  74
  75    uint32_t    type;
  76
  77    // Checksum of the Hard Disk Footer ("one's complement of the sum of all
  78    // the bytes in the footer without the checksum field")
  79    uint32_t    checksum;
  80
  81    // UUID used to identify a parent hard disk (backing file)
  82    uint8_t     uuid[16];
  83
  84    uint8_t     in_saved_state;
  85} QEMU_PACKED VHDFooter;
  86
  87typedef struct vhd_dyndisk_header {
  88    char        magic[8]; // "cxsparse"
  89
  90    // Offset of next header structure, 0xFFFFFFFF if none
  91    uint64_t    data_offset;
  92
  93    // Offset of the Block Allocation Table (BAT)
  94    uint64_t    table_offset;
  95
  96    uint32_t    version;
  97    uint32_t    max_table_entries; // 32bit/entry
  98
  99    // 2 MB by default, must be a power of two
 100    uint32_t    block_size;
 101
 102    uint32_t    checksum;
 103    uint8_t     parent_uuid[16];
 104    uint32_t    parent_timestamp;
 105    uint32_t    reserved;
 106
 107    // Backing file name (in UTF-16)
 108    uint8_t     parent_name[512];
 109
 110    struct {
 111        uint32_t    platform;
 112        uint32_t    data_space;
 113        uint32_t    data_length;
 114        uint32_t    reserved;
 115        uint64_t    data_offset;
 116    } parent_locator[8];
 117} QEMU_PACKED VHDDynDiskHeader;
 118
 119typedef struct BDRVVPCState {
 120    CoMutex lock;
 121    uint8_t footer_buf[HEADER_SIZE];
 122    uint64_t free_data_block_offset;
 123    int max_table_entries;
 124    uint32_t *pagetable;
 125    uint64_t bat_offset;
 126    uint64_t last_bitmap_offset;
 127
 128    uint32_t block_size;
 129    uint32_t bitmap_size;
 130
 131#ifdef CACHE
 132    uint8_t *pageentry_u8;
 133    uint32_t *pageentry_u32;
 134    uint16_t *pageentry_u16;
 135
 136    uint64_t last_bitmap;
 137#endif
 138
 139    Error *migration_blocker;
 140} BDRVVPCState;
 141
 142static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 143{
 144    uint32_t res = 0;
 145    int i;
 146
 147    for (i = 0; i < size; i++)
 148        res += buf[i];
 149
 150    return ~res;
 151}
 152
 153
 154static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 155{
 156    if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
 157        return 100;
 158    return 0;
 159}
 160
 161static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 162                    Error **errp)
 163{
 164    BDRVVPCState *s = bs->opaque;
 165    int i;
 166    VHDFooter *footer;
 167    VHDDynDiskHeader *dyndisk_header;
 168    uint8_t buf[HEADER_SIZE];
 169    uint32_t checksum;
 170    uint64_t computed_size;
 171    uint64_t pagetable_size;
 172    int disk_type = VHD_DYNAMIC;
 173    int ret;
 174
 175    ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
 176    if (ret < 0) {
 177        goto fail;
 178    }
 179
 180    footer = (VHDFooter *) s->footer_buf;
 181    if (strncmp(footer->creator, "conectix", 8)) {
 182        int64_t offset = bdrv_getlength(bs->file->bs);
 183        if (offset < 0) {
 184            ret = offset;
 185            goto fail;
 186        } else if (offset < HEADER_SIZE) {
 187            ret = -EINVAL;
 188            goto fail;
 189        }
 190
 191        /* If a fixed disk, the footer is found only at the end of the file */
 192        ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
 193                         HEADER_SIZE);
 194        if (ret < 0) {
 195            goto fail;
 196        }
 197        if (strncmp(footer->creator, "conectix", 8)) {
 198            error_setg(errp, "invalid VPC image");
 199            ret = -EINVAL;
 200            goto fail;
 201        }
 202        disk_type = VHD_FIXED;
 203    }
 204
 205    checksum = be32_to_cpu(footer->checksum);
 206    footer->checksum = 0;
 207    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
 208        fprintf(stderr, "block-vpc: The header checksum of '%s' is "
 209            "incorrect.\n", bs->filename);
 210
 211    /* Write 'checksum' back to footer, or else will leave it with zero. */
 212    footer->checksum = cpu_to_be32(checksum);
 213
 214    // The visible size of a image in Virtual PC depends on the geometry
 215    // rather than on the size stored in the footer (the size in the footer
 216    // is too large usually)
 217    bs->total_sectors = (int64_t)
 218        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 219
 220    /* Images that have exactly the maximum geometry are probably bigger and
 221     * would be truncated if we adhered to the geometry for them. Rely on
 222     * footer->current_size for them. */
 223    if (bs->total_sectors == VHD_MAX_GEOMETRY) {
 224        bs->total_sectors = be64_to_cpu(footer->current_size) /
 225                            BDRV_SECTOR_SIZE;
 226    }
 227
 228    /* Allow a maximum disk size of approximately 2 TB */
 229    if (bs->total_sectors >= VHD_MAX_SECTORS) {
 230        ret = -EFBIG;
 231        goto fail;
 232    }
 233
 234    if (disk_type == VHD_DYNAMIC) {
 235        ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
 236                         HEADER_SIZE);
 237        if (ret < 0) {
 238            goto fail;
 239        }
 240
 241        dyndisk_header = (VHDDynDiskHeader *) buf;
 242
 243        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
 244            ret = -EINVAL;
 245            goto fail;
 246        }
 247
 248        s->block_size = be32_to_cpu(dyndisk_header->block_size);
 249        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
 250            error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
 251            ret = -EINVAL;
 252            goto fail;
 253        }
 254        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
 255
 256        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
 257
 258        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
 259            ret = -EINVAL;
 260            goto fail;
 261        }
 262        if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) {
 263            ret = -EINVAL;
 264            goto fail;
 265        }
 266
 267        computed_size = (uint64_t) s->max_table_entries * s->block_size;
 268        if (computed_size < bs->total_sectors * 512) {
 269            ret = -EINVAL;
 270            goto fail;
 271        }
 272
 273        if (s->max_table_entries > SIZE_MAX / 4 ||
 274            s->max_table_entries > (int) INT_MAX / 4) {
 275            error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
 276                        s->max_table_entries);
 277            ret = -EINVAL;
 278            goto fail;
 279        }
 280
 281        pagetable_size = (uint64_t) s->max_table_entries * 4;
 282
 283        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
 284        if (s->pagetable == NULL) {
 285            ret = -ENOMEM;
 286            goto fail;
 287        }
 288
 289        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
 290
 291        ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
 292                         pagetable_size);
 293        if (ret < 0) {
 294            goto fail;
 295        }
 296
 297        s->free_data_block_offset =
 298            ROUND_UP(s->bat_offset + pagetable_size, 512);
 299
 300        for (i = 0; i < s->max_table_entries; i++) {
 301            be32_to_cpus(&s->pagetable[i]);
 302            if (s->pagetable[i] != 0xFFFFFFFF) {
 303                int64_t next = (512 * (int64_t) s->pagetable[i]) +
 304                    s->bitmap_size + s->block_size;
 305
 306                if (next > s->free_data_block_offset) {
 307                    s->free_data_block_offset = next;
 308                }
 309            }
 310        }
 311
 312        if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
 313            error_setg(errp, "block-vpc: free_data_block_offset points after "
 314                             "the end of file. The image has been truncated.");
 315            ret = -EINVAL;
 316            goto fail;
 317        }
 318
 319        s->last_bitmap_offset = (int64_t) -1;
 320
 321#ifdef CACHE
 322        s->pageentry_u8 = g_malloc(512);
 323        s->pageentry_u32 = s->pageentry_u8;
 324        s->pageentry_u16 = s->pageentry_u8;
 325        s->last_pagetable = -1;
 326#endif
 327    }
 328
 329    qemu_co_mutex_init(&s->lock);
 330
 331    /* Disable migration when VHD images are used */
 332    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
 333               "does not support live migration",
 334               bdrv_get_device_or_node_name(bs));
 335    migrate_add_blocker(s->migration_blocker);
 336
 337    return 0;
 338
 339fail:
 340    qemu_vfree(s->pagetable);
 341#ifdef CACHE
 342    g_free(s->pageentry_u8);
 343#endif
 344    return ret;
 345}
 346
 347static int vpc_reopen_prepare(BDRVReopenState *state,
 348                              BlockReopenQueue *queue, Error **errp)
 349{
 350    return 0;
 351}
 352
 353/*
 354 * Returns the absolute byte offset of the given sector in the image file.
 355 * If the sector is not allocated, -1 is returned instead.
 356 *
 357 * The parameter write must be 1 if the offset will be used for a write
 358 * operation (the block bitmaps is updated then), 0 otherwise.
 359 */
 360static inline int64_t get_sector_offset(BlockDriverState *bs,
 361    int64_t sector_num, int write)
 362{
 363    BDRVVPCState *s = bs->opaque;
 364    uint64_t offset = sector_num * 512;
 365    uint64_t bitmap_offset, block_offset;
 366    uint32_t pagetable_index, pageentry_index;
 367
 368    pagetable_index = offset / s->block_size;
 369    pageentry_index = (offset % s->block_size) / 512;
 370
 371    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
 372        return -1; // not allocated
 373
 374    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
 375    block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
 376
 377    // We must ensure that we don't write to any sectors which are marked as
 378    // unused in the bitmap. We get away with setting all bits in the block
 379    // bitmap each time we write to a new block. This might cause Virtual PC to
 380    // miss sparse read optimization, but it's not a problem in terms of
 381    // correctness.
 382    if (write && (s->last_bitmap_offset != bitmap_offset)) {
 383        uint8_t bitmap[s->bitmap_size];
 384
 385        s->last_bitmap_offset = bitmap_offset;
 386        memset(bitmap, 0xff, s->bitmap_size);
 387        bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
 388    }
 389
 390    return block_offset;
 391}
 392
 393/*
 394 * Writes the footer to the end of the image file. This is needed when the
 395 * file grows as it overwrites the old footer
 396 *
 397 * Returns 0 on success and < 0 on error
 398 */
 399static int rewrite_footer(BlockDriverState* bs)
 400{
 401    int ret;
 402    BDRVVPCState *s = bs->opaque;
 403    int64_t offset = s->free_data_block_offset;
 404
 405    ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
 406    if (ret < 0)
 407        return ret;
 408
 409    return 0;
 410}
 411
 412/*
 413 * Allocates a new block. This involves writing a new footer and updating
 414 * the Block Allocation Table to use the space at the old end of the image
 415 * file (overwriting the old footer)
 416 *
 417 * Returns the sectors' offset in the image file on success and < 0 on error
 418 */
 419static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
 420{
 421    BDRVVPCState *s = bs->opaque;
 422    int64_t bat_offset;
 423    uint32_t index, bat_value;
 424    int ret;
 425    uint8_t bitmap[s->bitmap_size];
 426
 427    // Check if sector_num is valid
 428    if ((sector_num < 0) || (sector_num > bs->total_sectors))
 429        return -1;
 430
 431    // Write entry into in-memory BAT
 432    index = (sector_num * 512) / s->block_size;
 433    if (s->pagetable[index] != 0xFFFFFFFF)
 434        return -1;
 435
 436    s->pagetable[index] = s->free_data_block_offset / 512;
 437
 438    // Initialize the block's bitmap
 439    memset(bitmap, 0xff, s->bitmap_size);
 440    ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
 441        s->bitmap_size);
 442    if (ret < 0) {
 443        return ret;
 444    }
 445
 446    // Write new footer (the old one will be overwritten)
 447    s->free_data_block_offset += s->block_size + s->bitmap_size;
 448    ret = rewrite_footer(bs);
 449    if (ret < 0)
 450        goto fail;
 451
 452    // Write BAT entry to disk
 453    bat_offset = s->bat_offset + (4 * index);
 454    bat_value = cpu_to_be32(s->pagetable[index]);
 455    ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
 456    if (ret < 0)
 457        goto fail;
 458
 459    return get_sector_offset(bs, sector_num, 0);
 460
 461fail:
 462    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
 463    return -1;
 464}
 465
 466static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 467{
 468    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
 469    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 470
 471    if (be32_to_cpu(footer->type) != VHD_FIXED) {
 472        bdi->cluster_size = s->block_size;
 473    }
 474
 475    bdi->unallocated_blocks_are_zero = true;
 476    return 0;
 477}
 478
 479static int vpc_read(BlockDriverState *bs, int64_t sector_num,
 480                    uint8_t *buf, int nb_sectors)
 481{
 482    BDRVVPCState *s = bs->opaque;
 483    int ret;
 484    int64_t offset;
 485    int64_t sectors, sectors_per_block;
 486    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 487
 488    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 489        return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
 490    }
 491    while (nb_sectors > 0) {
 492        offset = get_sector_offset(bs, sector_num, 0);
 493
 494        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
 495        sectors = sectors_per_block - (sector_num % sectors_per_block);
 496        if (sectors > nb_sectors) {
 497            sectors = nb_sectors;
 498        }
 499
 500        if (offset == -1) {
 501            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
 502        } else {
 503            ret = bdrv_pread(bs->file->bs, offset, buf,
 504                sectors * BDRV_SECTOR_SIZE);
 505            if (ret != sectors * BDRV_SECTOR_SIZE) {
 506                return -1;
 507            }
 508        }
 509
 510        nb_sectors -= sectors;
 511        sector_num += sectors;
 512        buf += sectors * BDRV_SECTOR_SIZE;
 513    }
 514    return 0;
 515}
 516
 517static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
 518                                    uint8_t *buf, int nb_sectors)
 519{
 520    int ret;
 521    BDRVVPCState *s = bs->opaque;
 522    qemu_co_mutex_lock(&s->lock);
 523    ret = vpc_read(bs, sector_num, buf, nb_sectors);
 524    qemu_co_mutex_unlock(&s->lock);
 525    return ret;
 526}
 527
 528static int vpc_write(BlockDriverState *bs, int64_t sector_num,
 529    const uint8_t *buf, int nb_sectors)
 530{
 531    BDRVVPCState *s = bs->opaque;
 532    int64_t offset;
 533    int64_t sectors, sectors_per_block;
 534    int ret;
 535    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 536
 537    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 538        return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
 539    }
 540    while (nb_sectors > 0) {
 541        offset = get_sector_offset(bs, sector_num, 1);
 542
 543        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
 544        sectors = sectors_per_block - (sector_num % sectors_per_block);
 545        if (sectors > nb_sectors) {
 546            sectors = nb_sectors;
 547        }
 548
 549        if (offset == -1) {
 550            offset = alloc_block(bs, sector_num);
 551            if (offset < 0)
 552                return -1;
 553        }
 554
 555        ret = bdrv_pwrite(bs->file->bs, offset, buf,
 556                          sectors * BDRV_SECTOR_SIZE);
 557        if (ret != sectors * BDRV_SECTOR_SIZE) {
 558            return -1;
 559        }
 560
 561        nb_sectors -= sectors;
 562        sector_num += sectors;
 563        buf += sectors * BDRV_SECTOR_SIZE;
 564    }
 565
 566    return 0;
 567}
 568
 569static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
 570                                     const uint8_t *buf, int nb_sectors)
 571{
 572    int ret;
 573    BDRVVPCState *s = bs->opaque;
 574    qemu_co_mutex_lock(&s->lock);
 575    ret = vpc_write(bs, sector_num, buf, nb_sectors);
 576    qemu_co_mutex_unlock(&s->lock);
 577    return ret;
 578}
 579
 580static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
 581        int64_t sector_num, int nb_sectors, int *pnum)
 582{
 583    BDRVVPCState *s = bs->opaque;
 584    VHDFooter *footer = (VHDFooter*) s->footer_buf;
 585    int64_t start, offset;
 586    bool allocated;
 587    int n;
 588
 589    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 590        *pnum = nb_sectors;
 591        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
 592               (sector_num << BDRV_SECTOR_BITS);
 593    }
 594
 595    offset = get_sector_offset(bs, sector_num, 0);
 596    start = offset;
 597    allocated = (offset != -1);
 598    *pnum = 0;
 599
 600    do {
 601        /* All sectors in a block are contiguous (without using the bitmap) */
 602        n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
 603          - sector_num;
 604        n = MIN(n, nb_sectors);
 605
 606        *pnum += n;
 607        sector_num += n;
 608        nb_sectors -= n;
 609        /* *pnum can't be greater than one block for allocated
 610         * sectors since there is always a bitmap in between. */
 611        if (allocated) {
 612            return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
 613        }
 614        if (nb_sectors == 0) {
 615            break;
 616        }
 617        offset = get_sector_offset(bs, sector_num, 0);
 618    } while (offset == -1);
 619
 620    return 0;
 621}
 622
 623/*
 624 * Calculates the number of cylinders, heads and sectors per cylinder
 625 * based on a given number of sectors. This is the algorithm described
 626 * in the VHD specification.
 627 *
 628 * Note that the geometry doesn't always exactly match total_sectors but
 629 * may round it down.
 630 *
 631 * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override
 632 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
 633 * and instead allow up to 255 heads.
 634 */
 635static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
 636    uint8_t* heads, uint8_t* secs_per_cyl)
 637{
 638    uint32_t cyls_times_heads;
 639
 640    total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
 641
 642    if (total_sectors >= 65535LL * 16 * 63) {
 643        *secs_per_cyl = 255;
 644        *heads = 16;
 645        cyls_times_heads = total_sectors / *secs_per_cyl;
 646    } else {
 647        *secs_per_cyl = 17;
 648        cyls_times_heads = total_sectors / *secs_per_cyl;
 649        *heads = (cyls_times_heads + 1023) / 1024;
 650
 651        if (*heads < 4) {
 652            *heads = 4;
 653        }
 654
 655        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
 656            *secs_per_cyl = 31;
 657            *heads = 16;
 658            cyls_times_heads = total_sectors / *secs_per_cyl;
 659        }
 660
 661        if (cyls_times_heads >= (*heads * 1024)) {
 662            *secs_per_cyl = 63;
 663            *heads = 16;
 664            cyls_times_heads = total_sectors / *secs_per_cyl;
 665        }
 666    }
 667
 668    *cyls = cyls_times_heads / *heads;
 669
 670    return 0;
 671}
 672
 673static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
 674                               int64_t total_sectors)
 675{
 676    VHDDynDiskHeader *dyndisk_header =
 677        (VHDDynDiskHeader *) buf;
 678    size_t block_size, num_bat_entries;
 679    int i;
 680    int ret;
 681    int64_t offset = 0;
 682
 683    // Write the footer (twice: at the beginning and at the end)
 684    block_size = 0x200000;
 685    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
 686
 687    ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
 688    if (ret) {
 689        goto fail;
 690    }
 691
 692    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
 693    ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
 694    if (ret < 0) {
 695        goto fail;
 696    }
 697
 698    // Write the initial BAT
 699    offset = 3 * 512;
 700
 701    memset(buf, 0xFF, 512);
 702    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
 703        ret = bdrv_pwrite_sync(bs, offset, buf, 512);
 704        if (ret < 0) {
 705            goto fail;
 706        }
 707        offset += 512;
 708    }
 709
 710    // Prepare the Dynamic Disk Header
 711    memset(buf, 0, 1024);
 712
 713    memcpy(dyndisk_header->magic, "cxsparse", 8);
 714
 715    /*
 716     * Note: The spec is actually wrong here for data_offset, it says
 717     * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
 718     */
 719    dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 720    dyndisk_header->table_offset = cpu_to_be64(3 * 512);
 721    dyndisk_header->version = cpu_to_be32(0x00010000);
 722    dyndisk_header->block_size = cpu_to_be32(block_size);
 723    dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
 724
 725    dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
 726
 727    // Write the header
 728    offset = 512;
 729
 730    ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
 731    if (ret < 0) {
 732        goto fail;
 733    }
 734
 735 fail:
 736    return ret;
 737}
 738
 739static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
 740                             int64_t total_size)
 741{
 742    int ret;
 743
 744    /* Add footer to total size */
 745    total_size += HEADER_SIZE;
 746
 747    ret = bdrv_truncate(bs, total_size);
 748    if (ret < 0) {
 749        return ret;
 750    }
 751
 752    ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
 753    if (ret < 0) {
 754        return ret;
 755    }
 756
 757    return ret;
 758}
 759
 760static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
 761{
 762    uint8_t buf[1024];
 763    VHDFooter *footer = (VHDFooter *) buf;
 764    char *disk_type_param;
 765    int i;
 766    uint16_t cyls = 0;
 767    uint8_t heads = 0;
 768    uint8_t secs_per_cyl = 0;
 769    int64_t total_sectors;
 770    int64_t total_size;
 771    int disk_type;
 772    int ret = -EIO;
 773    Error *local_err = NULL;
 774    BlockDriverState *bs = NULL;
 775
 776    /* Read out options */
 777    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
 778                          BDRV_SECTOR_SIZE);
 779    disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
 780    if (disk_type_param) {
 781        if (!strcmp(disk_type_param, "dynamic")) {
 782            disk_type = VHD_DYNAMIC;
 783        } else if (!strcmp(disk_type_param, "fixed")) {
 784            disk_type = VHD_FIXED;
 785        } else {
 786            ret = -EINVAL;
 787            goto out;
 788        }
 789    } else {
 790        disk_type = VHD_DYNAMIC;
 791    }
 792
 793    ret = bdrv_create_file(filename, opts, &local_err);
 794    if (ret < 0) {
 795        error_propagate(errp, local_err);
 796        goto out;
 797    }
 798    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
 799                    &local_err);
 800    if (ret < 0) {
 801        error_propagate(errp, local_err);
 802        goto out;
 803    }
 804
 805    /*
 806     * Calculate matching total_size and geometry. Increase the number of
 807     * sectors requested until we get enough (or fail). This ensures that
 808     * qemu-img convert doesn't truncate images, but rather rounds up.
 809     *
 810     * If the image size can't be represented by a spec conform CHS geometry,
 811     * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
 812     * the image size from the VHD footer to calculate total_sectors.
 813     */
 814    total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
 815    for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
 816        calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
 817    }
 818
 819    if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
 820        total_sectors = total_size / BDRV_SECTOR_SIZE;
 821        /* Allow a maximum disk size of approximately 2 TB */
 822        if (total_sectors > VHD_MAX_SECTORS) {
 823            ret = -EFBIG;
 824            goto out;
 825        }
 826    } else {
 827        total_sectors = (int64_t)cyls * heads * secs_per_cyl;
 828        total_size = total_sectors * BDRV_SECTOR_SIZE;
 829    }
 830
 831    /* Prepare the Hard Disk Footer */
 832    memset(buf, 0, 1024);
 833
 834    memcpy(footer->creator, "conectix", 8);
 835    /* TODO Check if "qemu" creator_app is ok for VPC */
 836    memcpy(footer->creator_app, "qemu", 4);
 837    memcpy(footer->creator_os, "Wi2k", 4);
 838
 839    footer->features = cpu_to_be32(0x02);
 840    footer->version = cpu_to_be32(0x00010000);
 841    if (disk_type == VHD_DYNAMIC) {
 842        footer->data_offset = cpu_to_be64(HEADER_SIZE);
 843    } else {
 844        footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 845    }
 846    footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
 847
 848    /* Version of Virtual PC 2007 */
 849    footer->major = cpu_to_be16(0x0005);
 850    footer->minor = cpu_to_be16(0x0003);
 851    footer->orig_size = cpu_to_be64(total_size);
 852    footer->current_size = cpu_to_be64(total_size);
 853    footer->cyls = cpu_to_be16(cyls);
 854    footer->heads = heads;
 855    footer->secs_per_cyl = secs_per_cyl;
 856
 857    footer->type = cpu_to_be32(disk_type);
 858
 859#if defined(CONFIG_UUID)
 860    uuid_generate(footer->uuid);
 861#endif
 862
 863    footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
 864
 865    if (disk_type == VHD_DYNAMIC) {
 866        ret = create_dynamic_disk(bs, buf, total_sectors);
 867    } else {
 868        ret = create_fixed_disk(bs, buf, total_size);
 869    }
 870
 871out:
 872    bdrv_unref(bs);
 873    g_free(disk_type_param);
 874    return ret;
 875}
 876
 877static int vpc_has_zero_init(BlockDriverState *bs)
 878{
 879    BDRVVPCState *s = bs->opaque;
 880    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 881
 882    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 883        return bdrv_has_zero_init(bs->file->bs);
 884    } else {
 885        return 1;
 886    }
 887}
 888
 889static void vpc_close(BlockDriverState *bs)
 890{
 891    BDRVVPCState *s = bs->opaque;
 892    qemu_vfree(s->pagetable);
 893#ifdef CACHE
 894    g_free(s->pageentry_u8);
 895#endif
 896
 897    migrate_del_blocker(s->migration_blocker);
 898    error_free(s->migration_blocker);
 899}
 900
 901static QemuOptsList vpc_create_opts = {
 902    .name = "vpc-create-opts",
 903    .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
 904    .desc = {
 905        {
 906            .name = BLOCK_OPT_SIZE,
 907            .type = QEMU_OPT_SIZE,
 908            .help = "Virtual disk size"
 909        },
 910        {
 911            .name = BLOCK_OPT_SUBFMT,
 912            .type = QEMU_OPT_STRING,
 913            .help =
 914                "Type of virtual hard disk format. Supported formats are "
 915                "{dynamic (default) | fixed} "
 916        },
 917        { /* end of list */ }
 918    }
 919};
 920
 921static BlockDriver bdrv_vpc = {
 922    .format_name    = "vpc",
 923    .instance_size  = sizeof(BDRVVPCState),
 924
 925    .bdrv_probe             = vpc_probe,
 926    .bdrv_open              = vpc_open,
 927    .bdrv_close             = vpc_close,
 928    .bdrv_reopen_prepare    = vpc_reopen_prepare,
 929    .bdrv_create            = vpc_create,
 930
 931    .bdrv_read                  = vpc_co_read,
 932    .bdrv_write                 = vpc_co_write,
 933    .bdrv_co_get_block_status   = vpc_co_get_block_status,
 934
 935    .bdrv_get_info          = vpc_get_info,
 936
 937    .create_opts            = &vpc_create_opts,
 938    .bdrv_has_zero_init     = vpc_has_zero_init,
 939};
 940
 941static void bdrv_vpc_init(void)
 942{
 943    bdrv_register(&bdrv_vpc);
 944}
 945
 946block_init(bdrv_vpc_init);
 947