qemu/block/vpc.c
<<
>>
Prefs
   1/*
   2 * Block driver for Connectix / Microsoft Virtual PC images
   3 *
   4 * Copyright (c) 2005 Alex Beregszaszi
   5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25#include "qemu/osdep.h"
  26#include "qapi/error.h"
  27#include "qemu-common.h"
  28#include "block/block_int.h"
  29#include "sysemu/block-backend.h"
  30#include "qemu/module.h"
  31#include "migration/migration.h"
  32#include "qemu/bswap.h"
  33#include "qemu/uuid.h"
  34
  35/**************************************************************/
  36
  37#define HEADER_SIZE 512
  38
  39//#define CACHE
  40
  41enum vhd_type {
  42    VHD_FIXED           = 2,
  43    VHD_DYNAMIC         = 3,
  44    VHD_DIFFERENCING    = 4,
  45};
  46
  47/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  48#define VHD_TIMESTAMP_BASE 946684800
  49
  50#define VHD_CHS_MAX_C   65535LL
  51#define VHD_CHS_MAX_H   16
  52#define VHD_CHS_MAX_S   255
  53
  54#define VHD_MAX_SECTORS       0xff000000    /* 2040 GiB max image size */
  55#define VHD_MAX_GEOMETRY      (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
  56
  57#define VPC_OPT_FORCE_SIZE "force_size"
  58
  59/* always big-endian */
  60typedef struct vhd_footer {
  61    char        creator[8]; /* "conectix" */
  62    uint32_t    features;
  63    uint32_t    version;
  64
  65    /* Offset of next header structure, 0xFFFFFFFF if none */
  66    uint64_t    data_offset;
  67
  68    /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  69    uint32_t    timestamp;
  70
  71    char        creator_app[4]; /*  e.g., "vpc " */
  72    uint16_t    major;
  73    uint16_t    minor;
  74    char        creator_os[4]; /* "Wi2k" */
  75
  76    uint64_t    orig_size;
  77    uint64_t    current_size;
  78
  79    uint16_t    cyls;
  80    uint8_t     heads;
  81    uint8_t     secs_per_cyl;
  82
  83    uint32_t    type;
  84
  85    /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
  86       the bytes in the footer without the checksum field") */
  87    uint32_t    checksum;
  88
  89    /* UUID used to identify a parent hard disk (backing file) */
  90    QemuUUID    uuid;
  91
  92    uint8_t     in_saved_state;
  93} QEMU_PACKED VHDFooter;
  94
  95typedef struct vhd_dyndisk_header {
  96    char        magic[8]; /* "cxsparse" */
  97
  98    /* Offset of next header structure, 0xFFFFFFFF if none */
  99    uint64_t    data_offset;
 100
 101    /* Offset of the Block Allocation Table (BAT) */
 102    uint64_t    table_offset;
 103
 104    uint32_t    version;
 105    uint32_t    max_table_entries; /* 32bit/entry */
 106
 107    /* 2 MB by default, must be a power of two */
 108    uint32_t    block_size;
 109
 110    uint32_t    checksum;
 111    uint8_t     parent_uuid[16];
 112    uint32_t    parent_timestamp;
 113    uint32_t    reserved;
 114
 115    /* Backing file name (in UTF-16) */
 116    uint8_t     parent_name[512];
 117
 118    struct {
 119        uint32_t    platform;
 120        uint32_t    data_space;
 121        uint32_t    data_length;
 122        uint32_t    reserved;
 123        uint64_t    data_offset;
 124    } parent_locator[8];
 125} QEMU_PACKED VHDDynDiskHeader;
 126
 127typedef struct BDRVVPCState {
 128    CoMutex lock;
 129    uint8_t footer_buf[HEADER_SIZE];
 130    uint64_t free_data_block_offset;
 131    int max_table_entries;
 132    uint32_t *pagetable;
 133    uint64_t bat_offset;
 134    uint64_t last_bitmap_offset;
 135
 136    uint32_t block_size;
 137    uint32_t bitmap_size;
 138    bool force_use_chs;
 139    bool force_use_sz;
 140
 141#ifdef CACHE
 142    uint8_t *pageentry_u8;
 143    uint32_t *pageentry_u32;
 144    uint16_t *pageentry_u16;
 145
 146    uint64_t last_bitmap;
 147#endif
 148
 149    Error *migration_blocker;
 150} BDRVVPCState;
 151
 152#define VPC_OPT_SIZE_CALC "force_size_calc"
 153static QemuOptsList vpc_runtime_opts = {
 154    .name = "vpc-runtime-opts",
 155    .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
 156    .desc = {
 157        {
 158            .name = VPC_OPT_SIZE_CALC,
 159            .type = QEMU_OPT_STRING,
 160            .help = "Force disk size calculation to use either CHS geometry, "
 161                    "or use the disk current_size specified in the VHD footer. "
 162                    "{chs, current_size}"
 163        },
 164        { /* end of list */ }
 165    }
 166};
 167
 168static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 169{
 170    uint32_t res = 0;
 171    int i;
 172
 173    for (i = 0; i < size; i++)
 174        res += buf[i];
 175
 176    return ~res;
 177}
 178
 179
 180static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 181{
 182    if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
 183        return 100;
 184    return 0;
 185}
 186
 187static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
 188                              Error **errp)
 189{
 190    BDRVVPCState *s = bs->opaque;
 191    const char *size_calc;
 192
 193    size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
 194
 195    if (!size_calc) {
 196       /* no override, use autodetect only */
 197    } else if (!strcmp(size_calc, "current_size")) {
 198        s->force_use_sz = true;
 199    } else if (!strcmp(size_calc, "chs")) {
 200        s->force_use_chs = true;
 201    } else {
 202        error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
 203    }
 204}
 205
 206static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 207                    Error **errp)
 208{
 209    BDRVVPCState *s = bs->opaque;
 210    int i;
 211    VHDFooter *footer;
 212    VHDDynDiskHeader *dyndisk_header;
 213    QemuOpts *opts = NULL;
 214    Error *local_err = NULL;
 215    bool use_chs;
 216    uint8_t buf[HEADER_SIZE];
 217    uint32_t checksum;
 218    uint64_t computed_size;
 219    uint64_t pagetable_size;
 220    int disk_type = VHD_DYNAMIC;
 221    int ret;
 222
 223    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
 224    qemu_opts_absorb_qdict(opts, options, &local_err);
 225    if (local_err) {
 226        error_propagate(errp, local_err);
 227        ret = -EINVAL;
 228        goto fail;
 229    }
 230
 231    vpc_parse_options(bs, opts, &local_err);
 232    if (local_err) {
 233        error_propagate(errp, local_err);
 234        ret = -EINVAL;
 235        goto fail;
 236    }
 237
 238    ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
 239    if (ret < 0) {
 240        error_setg(errp, "Unable to read VHD header");
 241        goto fail;
 242    }
 243
 244    footer = (VHDFooter *) s->footer_buf;
 245    if (strncmp(footer->creator, "conectix", 8)) {
 246        int64_t offset = bdrv_getlength(bs->file->bs);
 247        if (offset < 0) {
 248            ret = offset;
 249            error_setg(errp, "Invalid file size");
 250            goto fail;
 251        } else if (offset < HEADER_SIZE) {
 252            ret = -EINVAL;
 253            error_setg(errp, "File too small for a VHD header");
 254            goto fail;
 255        }
 256
 257        /* If a fixed disk, the footer is found only at the end of the file */
 258        ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
 259                         HEADER_SIZE);
 260        if (ret < 0) {
 261            goto fail;
 262        }
 263        if (strncmp(footer->creator, "conectix", 8)) {
 264            error_setg(errp, "invalid VPC image");
 265            ret = -EINVAL;
 266            goto fail;
 267        }
 268        disk_type = VHD_FIXED;
 269    }
 270
 271    checksum = be32_to_cpu(footer->checksum);
 272    footer->checksum = 0;
 273    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
 274        fprintf(stderr, "block-vpc: The header checksum of '%s' is "
 275            "incorrect.\n", bs->filename);
 276
 277    /* Write 'checksum' back to footer, or else will leave it with zero. */
 278    footer->checksum = cpu_to_be32(checksum);
 279
 280    /* The visible size of a image in Virtual PC depends on the geometry
 281       rather than on the size stored in the footer (the size in the footer
 282       is too large usually) */
 283    bs->total_sectors = (int64_t)
 284        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 285
 286    /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
 287     * VHD image sizes differently.  VPC will rely on CHS geometry,
 288     * while Hyper-V and disk2vhd use the size specified in the footer.
 289     *
 290     * We use a couple of approaches to try and determine the correct method:
 291     * look at the Creator App field, and look for images that have CHS
 292     * geometry that is the maximum value.
 293     *
 294     * If the CHS geometry is the maximum CHS geometry, then we assume that
 295     * the size is the footer->current_size to avoid truncation.  Otherwise,
 296     * we follow the table based on footer->creator_app:
 297     *
 298     *  Known creator apps:
 299     *      'vpc '  :  CHS              Virtual PC (uses disk geometry)
 300     *      'qemu'  :  CHS              QEMU (uses disk geometry)
 301     *      'qem2'  :  current_size     QEMU (uses current_size)
 302     *      'win '  :  current_size     Hyper-V
 303     *      'd2v '  :  current_size     Disk2vhd
 304     *      'tap\0' :  current_size     XenServer
 305     *      'CTXS'  :  current_size     XenConverter
 306     *
 307     *  The user can override the table values via drive options, however
 308     *  even with an override we will still use current_size for images
 309     *  that have CHS geometry of the maximum size.
 310     */
 311    use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
 312               !!strncmp(footer->creator_app, "qem2", 4) &&
 313               !!strncmp(footer->creator_app, "d2v ", 4) &&
 314               !!strncmp(footer->creator_app, "CTXS", 4) &&
 315               !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
 316
 317    if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
 318        bs->total_sectors = be64_to_cpu(footer->current_size) /
 319                                        BDRV_SECTOR_SIZE;
 320    }
 321
 322    /* Allow a maximum disk size of 2040 GiB */
 323    if (bs->total_sectors > VHD_MAX_SECTORS) {
 324        ret = -EFBIG;
 325        goto fail;
 326    }
 327
 328    if (disk_type == VHD_DYNAMIC) {
 329        ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
 330                         HEADER_SIZE);
 331        if (ret < 0) {
 332            error_setg(errp, "Error reading dynamic VHD header");
 333            goto fail;
 334        }
 335
 336        dyndisk_header = (VHDDynDiskHeader *) buf;
 337
 338        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
 339            error_setg(errp, "Invalid header magic");
 340            ret = -EINVAL;
 341            goto fail;
 342        }
 343
 344        s->block_size = be32_to_cpu(dyndisk_header->block_size);
 345        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
 346            error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
 347            ret = -EINVAL;
 348            goto fail;
 349        }
 350        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
 351
 352        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
 353
 354        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
 355            error_setg(errp, "Too many blocks");
 356            ret = -EINVAL;
 357            goto fail;
 358        }
 359
 360        computed_size = (uint64_t) s->max_table_entries * s->block_size;
 361        if (computed_size < bs->total_sectors * 512) {
 362            error_setg(errp, "Page table too small");
 363            ret = -EINVAL;
 364            goto fail;
 365        }
 366
 367        if (s->max_table_entries > SIZE_MAX / 4 ||
 368            s->max_table_entries > (int) INT_MAX / 4) {
 369            error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
 370                        s->max_table_entries);
 371            ret = -EINVAL;
 372            goto fail;
 373        }
 374
 375        pagetable_size = (uint64_t) s->max_table_entries * 4;
 376
 377        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
 378        if (s->pagetable == NULL) {
 379            error_setg(errp, "Unable to allocate memory for page table");
 380            ret = -ENOMEM;
 381            goto fail;
 382        }
 383
 384        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
 385
 386        ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
 387                         pagetable_size);
 388        if (ret < 0) {
 389            error_setg(errp, "Error reading pagetable");
 390            goto fail;
 391        }
 392
 393        s->free_data_block_offset =
 394            ROUND_UP(s->bat_offset + pagetable_size, 512);
 395
 396        for (i = 0; i < s->max_table_entries; i++) {
 397            be32_to_cpus(&s->pagetable[i]);
 398            if (s->pagetable[i] != 0xFFFFFFFF) {
 399                int64_t next = (512 * (int64_t) s->pagetable[i]) +
 400                    s->bitmap_size + s->block_size;
 401
 402                if (next > s->free_data_block_offset) {
 403                    s->free_data_block_offset = next;
 404                }
 405            }
 406        }
 407
 408        if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
 409            error_setg(errp, "block-vpc: free_data_block_offset points after "
 410                             "the end of file. The image has been truncated.");
 411            ret = -EINVAL;
 412            goto fail;
 413        }
 414
 415        s->last_bitmap_offset = (int64_t) -1;
 416
 417#ifdef CACHE
 418        s->pageentry_u8 = g_malloc(512);
 419        s->pageentry_u32 = s->pageentry_u8;
 420        s->pageentry_u16 = s->pageentry_u8;
 421        s->last_pagetable = -1;
 422#endif
 423    }
 424
 425    qemu_co_mutex_init(&s->lock);
 426
 427    /* Disable migration when VHD images are used */
 428    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
 429               "does not support live migration",
 430               bdrv_get_device_or_node_name(bs));
 431    migrate_add_blocker(s->migration_blocker);
 432
 433    return 0;
 434
 435fail:
 436    qemu_vfree(s->pagetable);
 437#ifdef CACHE
 438    g_free(s->pageentry_u8);
 439#endif
 440    return ret;
 441}
 442
 443static int vpc_reopen_prepare(BDRVReopenState *state,
 444                              BlockReopenQueue *queue, Error **errp)
 445{
 446    return 0;
 447}
 448
 449/*
 450 * Returns the absolute byte offset of the given sector in the image file.
 451 * If the sector is not allocated, -1 is returned instead.
 452 *
 453 * The parameter write must be 1 if the offset will be used for a write
 454 * operation (the block bitmaps is updated then), 0 otherwise.
 455 */
 456static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
 457                                       bool write)
 458{
 459    BDRVVPCState *s = bs->opaque;
 460    uint64_t bitmap_offset, block_offset;
 461    uint32_t pagetable_index, offset_in_block;
 462
 463    pagetable_index = offset / s->block_size;
 464    offset_in_block = offset % s->block_size;
 465
 466    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
 467        return -1; /* not allocated */
 468
 469    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
 470    block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
 471
 472    /* We must ensure that we don't write to any sectors which are marked as
 473       unused in the bitmap. We get away with setting all bits in the block
 474       bitmap each time we write to a new block. This might cause Virtual PC to
 475       miss sparse read optimization, but it's not a problem in terms of
 476       correctness. */
 477    if (write && (s->last_bitmap_offset != bitmap_offset)) {
 478        uint8_t bitmap[s->bitmap_size];
 479
 480        s->last_bitmap_offset = bitmap_offset;
 481        memset(bitmap, 0xff, s->bitmap_size);
 482        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
 483    }
 484
 485    return block_offset;
 486}
 487
 488static inline int64_t get_sector_offset(BlockDriverState *bs,
 489                                        int64_t sector_num, bool write)
 490{
 491    return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
 492}
 493
 494/*
 495 * Writes the footer to the end of the image file. This is needed when the
 496 * file grows as it overwrites the old footer
 497 *
 498 * Returns 0 on success and < 0 on error
 499 */
 500static int rewrite_footer(BlockDriverState* bs)
 501{
 502    int ret;
 503    BDRVVPCState *s = bs->opaque;
 504    int64_t offset = s->free_data_block_offset;
 505
 506    ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
 507    if (ret < 0)
 508        return ret;
 509
 510    return 0;
 511}
 512
 513/*
 514 * Allocates a new block. This involves writing a new footer and updating
 515 * the Block Allocation Table to use the space at the old end of the image
 516 * file (overwriting the old footer)
 517 *
 518 * Returns the sectors' offset in the image file on success and < 0 on error
 519 */
 520static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
 521{
 522    BDRVVPCState *s = bs->opaque;
 523    int64_t bat_offset;
 524    uint32_t index, bat_value;
 525    int ret;
 526    uint8_t bitmap[s->bitmap_size];
 527
 528    /* Check if sector_num is valid */
 529    if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
 530        return -EINVAL;
 531    }
 532
 533    /* Write entry into in-memory BAT */
 534    index = offset / s->block_size;
 535    assert(s->pagetable[index] == 0xFFFFFFFF);
 536    s->pagetable[index] = s->free_data_block_offset / 512;
 537
 538    /* Initialize the block's bitmap */
 539    memset(bitmap, 0xff, s->bitmap_size);
 540    ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
 541        s->bitmap_size);
 542    if (ret < 0) {
 543        return ret;
 544    }
 545
 546    /* Write new footer (the old one will be overwritten) */
 547    s->free_data_block_offset += s->block_size + s->bitmap_size;
 548    ret = rewrite_footer(bs);
 549    if (ret < 0)
 550        goto fail;
 551
 552    /* Write BAT entry to disk */
 553    bat_offset = s->bat_offset + (4 * index);
 554    bat_value = cpu_to_be32(s->pagetable[index]);
 555    ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
 556    if (ret < 0)
 557        goto fail;
 558
 559    return get_image_offset(bs, offset, false);
 560
 561fail:
 562    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
 563    return ret;
 564}
 565
 566static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 567{
 568    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
 569    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 570
 571    if (be32_to_cpu(footer->type) != VHD_FIXED) {
 572        bdi->cluster_size = s->block_size;
 573    }
 574
 575    bdi->unallocated_blocks_are_zero = true;
 576    return 0;
 577}
 578
 579static int coroutine_fn
 580vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 581              QEMUIOVector *qiov, int flags)
 582{
 583    BDRVVPCState *s = bs->opaque;
 584    int ret;
 585    int64_t image_offset;
 586    int64_t n_bytes;
 587    int64_t bytes_done = 0;
 588    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 589    QEMUIOVector local_qiov;
 590
 591    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 592        return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
 593    }
 594
 595    qemu_co_mutex_lock(&s->lock);
 596    qemu_iovec_init(&local_qiov, qiov->niov);
 597
 598    while (bytes > 0) {
 599        image_offset = get_image_offset(bs, offset, false);
 600        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
 601
 602        if (image_offset == -1) {
 603            qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
 604        } else {
 605            qemu_iovec_reset(&local_qiov);
 606            qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 607
 608            ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
 609                                 &local_qiov, 0);
 610            if (ret < 0) {
 611                goto fail;
 612            }
 613        }
 614
 615        bytes -= n_bytes;
 616        offset += n_bytes;
 617        bytes_done += n_bytes;
 618    }
 619
 620    ret = 0;
 621fail:
 622    qemu_iovec_destroy(&local_qiov);
 623    qemu_co_mutex_unlock(&s->lock);
 624
 625    return ret;
 626}
 627
 628static int coroutine_fn
 629vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 630               QEMUIOVector *qiov, int flags)
 631{
 632    BDRVVPCState *s = bs->opaque;
 633    int64_t image_offset;
 634    int64_t n_bytes;
 635    int64_t bytes_done = 0;
 636    int ret;
 637    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 638    QEMUIOVector local_qiov;
 639
 640    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 641        return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
 642    }
 643
 644    qemu_co_mutex_lock(&s->lock);
 645    qemu_iovec_init(&local_qiov, qiov->niov);
 646
 647    while (bytes > 0) {
 648        image_offset = get_image_offset(bs, offset, true);
 649        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
 650
 651        if (image_offset == -1) {
 652            image_offset = alloc_block(bs, offset);
 653            if (image_offset < 0) {
 654                ret = image_offset;
 655                goto fail;
 656            }
 657        }
 658
 659        qemu_iovec_reset(&local_qiov);
 660        qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 661
 662        ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
 663                              &local_qiov, 0);
 664        if (ret < 0) {
 665            goto fail;
 666        }
 667
 668        bytes -= n_bytes;
 669        offset += n_bytes;
 670        bytes_done += n_bytes;
 671    }
 672
 673    ret = 0;
 674fail:
 675    qemu_iovec_destroy(&local_qiov);
 676    qemu_co_mutex_unlock(&s->lock);
 677
 678    return ret;
 679}
 680
 681static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
 682        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
 683{
 684    BDRVVPCState *s = bs->opaque;
 685    VHDFooter *footer = (VHDFooter*) s->footer_buf;
 686    int64_t start, offset;
 687    bool allocated;
 688    int n;
 689
 690    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 691        *pnum = nb_sectors;
 692        *file = bs->file->bs;
 693        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
 694               (sector_num << BDRV_SECTOR_BITS);
 695    }
 696
 697    offset = get_sector_offset(bs, sector_num, 0);
 698    start = offset;
 699    allocated = (offset != -1);
 700    *pnum = 0;
 701
 702    do {
 703        /* All sectors in a block are contiguous (without using the bitmap) */
 704        n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
 705          - sector_num;
 706        n = MIN(n, nb_sectors);
 707
 708        *pnum += n;
 709        sector_num += n;
 710        nb_sectors -= n;
 711        /* *pnum can't be greater than one block for allocated
 712         * sectors since there is always a bitmap in between. */
 713        if (allocated) {
 714            *file = bs->file->bs;
 715            return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
 716        }
 717        if (nb_sectors == 0) {
 718            break;
 719        }
 720        offset = get_sector_offset(bs, sector_num, 0);
 721    } while (offset == -1);
 722
 723    return 0;
 724}
 725
 726/*
 727 * Calculates the number of cylinders, heads and sectors per cylinder
 728 * based on a given number of sectors. This is the algorithm described
 729 * in the VHD specification.
 730 *
 731 * Note that the geometry doesn't always exactly match total_sectors but
 732 * may round it down.
 733 *
 734 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
 735 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
 736 * and instead allow up to 255 heads.
 737 */
 738static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
 739    uint8_t* heads, uint8_t* secs_per_cyl)
 740{
 741    uint32_t cyls_times_heads;
 742
 743    total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
 744
 745    if (total_sectors >= 65535LL * 16 * 63) {
 746        *secs_per_cyl = 255;
 747        *heads = 16;
 748        cyls_times_heads = total_sectors / *secs_per_cyl;
 749    } else {
 750        *secs_per_cyl = 17;
 751        cyls_times_heads = total_sectors / *secs_per_cyl;
 752        *heads = (cyls_times_heads + 1023) / 1024;
 753
 754        if (*heads < 4) {
 755            *heads = 4;
 756        }
 757
 758        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
 759            *secs_per_cyl = 31;
 760            *heads = 16;
 761            cyls_times_heads = total_sectors / *secs_per_cyl;
 762        }
 763
 764        if (cyls_times_heads >= (*heads * 1024)) {
 765            *secs_per_cyl = 63;
 766            *heads = 16;
 767            cyls_times_heads = total_sectors / *secs_per_cyl;
 768        }
 769    }
 770
 771    *cyls = cyls_times_heads / *heads;
 772
 773    return 0;
 774}
 775
 776static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
 777                               int64_t total_sectors)
 778{
 779    VHDDynDiskHeader *dyndisk_header =
 780        (VHDDynDiskHeader *) buf;
 781    size_t block_size, num_bat_entries;
 782    int i;
 783    int ret;
 784    int64_t offset = 0;
 785
 786    /* Write the footer (twice: at the beginning and at the end) */
 787    block_size = 0x200000;
 788    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
 789
 790    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
 791    if (ret < 0) {
 792        goto fail;
 793    }
 794
 795    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
 796    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
 797    if (ret < 0) {
 798        goto fail;
 799    }
 800
 801    /* Write the initial BAT */
 802    offset = 3 * 512;
 803
 804    memset(buf, 0xFF, 512);
 805    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
 806        ret = blk_pwrite(blk, offset, buf, 512, 0);
 807        if (ret < 0) {
 808            goto fail;
 809        }
 810        offset += 512;
 811    }
 812
 813    /* Prepare the Dynamic Disk Header */
 814    memset(buf, 0, 1024);
 815
 816    memcpy(dyndisk_header->magic, "cxsparse", 8);
 817
 818    /*
 819     * Note: The spec is actually wrong here for data_offset, it says
 820     * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
 821     */
 822    dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 823    dyndisk_header->table_offset = cpu_to_be64(3 * 512);
 824    dyndisk_header->version = cpu_to_be32(0x00010000);
 825    dyndisk_header->block_size = cpu_to_be32(block_size);
 826    dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
 827
 828    dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
 829
 830    /* Write the header */
 831    offset = 512;
 832
 833    ret = blk_pwrite(blk, offset, buf, 1024, 0);
 834    if (ret < 0) {
 835        goto fail;
 836    }
 837
 838 fail:
 839    return ret;
 840}
 841
 842static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
 843                             int64_t total_size)
 844{
 845    int ret;
 846
 847    /* Add footer to total size */
 848    total_size += HEADER_SIZE;
 849
 850    ret = blk_truncate(blk, total_size);
 851    if (ret < 0) {
 852        return ret;
 853    }
 854
 855    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
 856    if (ret < 0) {
 857        return ret;
 858    }
 859
 860    return ret;
 861}
 862
 863static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
 864{
 865    uint8_t buf[1024];
 866    VHDFooter *footer = (VHDFooter *) buf;
 867    char *disk_type_param;
 868    int i;
 869    uint16_t cyls = 0;
 870    uint8_t heads = 0;
 871    uint8_t secs_per_cyl = 0;
 872    int64_t total_sectors;
 873    int64_t total_size;
 874    int disk_type;
 875    int ret = -EIO;
 876    bool force_size;
 877    Error *local_err = NULL;
 878    BlockBackend *blk = NULL;
 879
 880    /* Read out options */
 881    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
 882                          BDRV_SECTOR_SIZE);
 883    disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
 884    if (disk_type_param) {
 885        if (!strcmp(disk_type_param, "dynamic")) {
 886            disk_type = VHD_DYNAMIC;
 887        } else if (!strcmp(disk_type_param, "fixed")) {
 888            disk_type = VHD_FIXED;
 889        } else {
 890            error_setg(errp, "Invalid disk type, %s", disk_type_param);
 891            ret = -EINVAL;
 892            goto out;
 893        }
 894    } else {
 895        disk_type = VHD_DYNAMIC;
 896    }
 897
 898    force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
 899
 900    ret = bdrv_create_file(filename, opts, &local_err);
 901    if (ret < 0) {
 902        error_propagate(errp, local_err);
 903        goto out;
 904    }
 905
 906    blk = blk_new_open(filename, NULL, NULL,
 907                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
 908    if (blk == NULL) {
 909        error_propagate(errp, local_err);
 910        ret = -EIO;
 911        goto out;
 912    }
 913
 914    blk_set_allow_write_beyond_eof(blk, true);
 915
 916    /*
 917     * Calculate matching total_size and geometry. Increase the number of
 918     * sectors requested until we get enough (or fail). This ensures that
 919     * qemu-img convert doesn't truncate images, but rather rounds up.
 920     *
 921     * If the image size can't be represented by a spec conformant CHS geometry,
 922     * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
 923     * the image size from the VHD footer to calculate total_sectors.
 924     */
 925    if (force_size) {
 926        /* This will force the use of total_size for sector count, below */
 927        cyls         = VHD_CHS_MAX_C;
 928        heads        = VHD_CHS_MAX_H;
 929        secs_per_cyl = VHD_CHS_MAX_S;
 930    } else {
 931        total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
 932        for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
 933            calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
 934        }
 935    }
 936
 937    if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
 938        total_sectors = total_size / BDRV_SECTOR_SIZE;
 939        /* Allow a maximum disk size of 2040 GiB */
 940        if (total_sectors > VHD_MAX_SECTORS) {
 941            error_setg(errp, "Disk size is too large, max size is 2040 GiB");
 942            ret = -EFBIG;
 943            goto out;
 944        }
 945    } else {
 946        total_sectors = (int64_t)cyls * heads * secs_per_cyl;
 947        total_size = total_sectors * BDRV_SECTOR_SIZE;
 948    }
 949
 950    /* Prepare the Hard Disk Footer */
 951    memset(buf, 0, 1024);
 952
 953    memcpy(footer->creator, "conectix", 8);
 954    if (force_size) {
 955        memcpy(footer->creator_app, "qem2", 4);
 956    } else {
 957        memcpy(footer->creator_app, "qemu", 4);
 958    }
 959    memcpy(footer->creator_os, "Wi2k", 4);
 960
 961    footer->features = cpu_to_be32(0x02);
 962    footer->version = cpu_to_be32(0x00010000);
 963    if (disk_type == VHD_DYNAMIC) {
 964        footer->data_offset = cpu_to_be64(HEADER_SIZE);
 965    } else {
 966        footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 967    }
 968    footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
 969
 970    /* Version of Virtual PC 2007 */
 971    footer->major = cpu_to_be16(0x0005);
 972    footer->minor = cpu_to_be16(0x0003);
 973    footer->orig_size = cpu_to_be64(total_size);
 974    footer->current_size = cpu_to_be64(total_size);
 975    footer->cyls = cpu_to_be16(cyls);
 976    footer->heads = heads;
 977    footer->secs_per_cyl = secs_per_cyl;
 978
 979    footer->type = cpu_to_be32(disk_type);
 980
 981    qemu_uuid_generate(&footer->uuid);
 982
 983    footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
 984
 985    if (disk_type == VHD_DYNAMIC) {
 986        ret = create_dynamic_disk(blk, buf, total_sectors);
 987    } else {
 988        ret = create_fixed_disk(blk, buf, total_size);
 989    }
 990    if (ret < 0) {
 991        error_setg(errp, "Unable to create or write VHD header");
 992    }
 993
 994out:
 995    blk_unref(blk);
 996    g_free(disk_type_param);
 997    return ret;
 998}
 999
1000static int vpc_has_zero_init(BlockDriverState *bs)
1001{
1002    BDRVVPCState *s = bs->opaque;
1003    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
1004
1005    if (be32_to_cpu(footer->type) == VHD_FIXED) {
1006        return bdrv_has_zero_init(bs->file->bs);
1007    } else {
1008        return 1;
1009    }
1010}
1011
1012static void vpc_close(BlockDriverState *bs)
1013{
1014    BDRVVPCState *s = bs->opaque;
1015    qemu_vfree(s->pagetable);
1016#ifdef CACHE
1017    g_free(s->pageentry_u8);
1018#endif
1019
1020    migrate_del_blocker(s->migration_blocker);
1021    error_free(s->migration_blocker);
1022}
1023
1024static QemuOptsList vpc_create_opts = {
1025    .name = "vpc-create-opts",
1026    .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1027    .desc = {
1028        {
1029            .name = BLOCK_OPT_SIZE,
1030            .type = QEMU_OPT_SIZE,
1031            .help = "Virtual disk size"
1032        },
1033        {
1034            .name = BLOCK_OPT_SUBFMT,
1035            .type = QEMU_OPT_STRING,
1036            .help =
1037                "Type of virtual hard disk format. Supported formats are "
1038                "{dynamic (default) | fixed} "
1039        },
1040        {
1041            .name = VPC_OPT_FORCE_SIZE,
1042            .type = QEMU_OPT_BOOL,
1043            .help = "Force disk size calculation to use the actual size "
1044                    "specified, rather than using the nearest CHS-based "
1045                    "calculation"
1046        },
1047        { /* end of list */ }
1048    }
1049};
1050
1051static BlockDriver bdrv_vpc = {
1052    .format_name    = "vpc",
1053    .instance_size  = sizeof(BDRVVPCState),
1054
1055    .bdrv_probe             = vpc_probe,
1056    .bdrv_open              = vpc_open,
1057    .bdrv_close             = vpc_close,
1058    .bdrv_reopen_prepare    = vpc_reopen_prepare,
1059    .bdrv_create            = vpc_create,
1060
1061    .bdrv_co_preadv             = vpc_co_preadv,
1062    .bdrv_co_pwritev            = vpc_co_pwritev,
1063    .bdrv_co_get_block_status   = vpc_co_get_block_status,
1064
1065    .bdrv_get_info          = vpc_get_info,
1066
1067    .create_opts            = &vpc_create_opts,
1068    .bdrv_has_zero_init     = vpc_has_zero_init,
1069};
1070
1071static void bdrv_vpc_init(void)
1072{
1073    bdrv_register(&bdrv_vpc);
1074}
1075
1076block_init(bdrv_vpc_init);
1077