qemu/block/vpc.c
<<
>>
Prefs
   1/*
   2 * Block driver for Connectix / Microsoft Virtual PC images
   3 *
   4 * Copyright (c) 2005 Alex Beregszaszi
   5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25#include "qemu/osdep.h"
  26#include "qapi/error.h"
  27#include "qemu-common.h"
  28#include "block/block_int.h"
  29#include "sysemu/block-backend.h"
  30#include "qemu/module.h"
  31#include "migration/migration.h"
  32#include "qemu/bswap.h"
  33#if defined(CONFIG_UUID)
  34#include <uuid/uuid.h>
  35#endif
  36
  37/**************************************************************/
  38
  39#define HEADER_SIZE 512
  40
  41//#define CACHE
  42
  43enum vhd_type {
  44    VHD_FIXED           = 2,
  45    VHD_DYNAMIC         = 3,
  46    VHD_DIFFERENCING    = 4,
  47};
  48
  49/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  50#define VHD_TIMESTAMP_BASE 946684800
  51
  52#define VHD_CHS_MAX_C   65535LL
  53#define VHD_CHS_MAX_H   16
  54#define VHD_CHS_MAX_S   255
  55
  56#define VHD_MAX_SECTORS       0xff000000    /* 2040 GiB max image size */
  57#define VHD_MAX_GEOMETRY      (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
  58
  59#define VPC_OPT_FORCE_SIZE "force_size"
  60
  61/* always big-endian */
  62typedef struct vhd_footer {
  63    char        creator[8]; /* "conectix" */
  64    uint32_t    features;
  65    uint32_t    version;
  66
  67    /* Offset of next header structure, 0xFFFFFFFF if none */
  68    uint64_t    data_offset;
  69
  70    /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  71    uint32_t    timestamp;
  72
  73    char        creator_app[4]; /*  e.g., "vpc " */
  74    uint16_t    major;
  75    uint16_t    minor;
  76    char        creator_os[4]; /* "Wi2k" */
  77
  78    uint64_t    orig_size;
  79    uint64_t    current_size;
  80
  81    uint16_t    cyls;
  82    uint8_t     heads;
  83    uint8_t     secs_per_cyl;
  84
  85    uint32_t    type;
  86
  87    /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
  88       the bytes in the footer without the checksum field") */
  89    uint32_t    checksum;
  90
  91    /* UUID used to identify a parent hard disk (backing file) */
  92    uint8_t     uuid[16];
  93
  94    uint8_t     in_saved_state;
  95} QEMU_PACKED VHDFooter;
  96
  97typedef struct vhd_dyndisk_header {
  98    char        magic[8]; /* "cxsparse" */
  99
 100    /* Offset of next header structure, 0xFFFFFFFF if none */
 101    uint64_t    data_offset;
 102
 103    /* Offset of the Block Allocation Table (BAT) */
 104    uint64_t    table_offset;
 105
 106    uint32_t    version;
 107    uint32_t    max_table_entries; /* 32bit/entry */
 108
 109    /* 2 MB by default, must be a power of two */
 110    uint32_t    block_size;
 111
 112    uint32_t    checksum;
 113    uint8_t     parent_uuid[16];
 114    uint32_t    parent_timestamp;
 115    uint32_t    reserved;
 116
 117    /* Backing file name (in UTF-16) */
 118    uint8_t     parent_name[512];
 119
 120    struct {
 121        uint32_t    platform;
 122        uint32_t    data_space;
 123        uint32_t    data_length;
 124        uint32_t    reserved;
 125        uint64_t    data_offset;
 126    } parent_locator[8];
 127} QEMU_PACKED VHDDynDiskHeader;
 128
 129typedef struct BDRVVPCState {
 130    CoMutex lock;
 131    uint8_t footer_buf[HEADER_SIZE];
 132    uint64_t free_data_block_offset;
 133    int max_table_entries;
 134    uint32_t *pagetable;
 135    uint64_t bat_offset;
 136    uint64_t last_bitmap_offset;
 137
 138    uint32_t block_size;
 139    uint32_t bitmap_size;
 140    bool force_use_chs;
 141    bool force_use_sz;
 142
 143#ifdef CACHE
 144    uint8_t *pageentry_u8;
 145    uint32_t *pageentry_u32;
 146    uint16_t *pageentry_u16;
 147
 148    uint64_t last_bitmap;
 149#endif
 150
 151    Error *migration_blocker;
 152} BDRVVPCState;
 153
 154#define VPC_OPT_SIZE_CALC "force_size_calc"
 155static QemuOptsList vpc_runtime_opts = {
 156    .name = "vpc-runtime-opts",
 157    .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
 158    .desc = {
 159        {
 160            .name = VPC_OPT_SIZE_CALC,
 161            .type = QEMU_OPT_STRING,
 162            .help = "Force disk size calculation to use either CHS geometry, "
 163                    "or use the disk current_size specified in the VHD footer. "
 164                    "{chs, current_size}"
 165        },
 166        { /* end of list */ }
 167    }
 168};
 169
 170static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 171{
 172    uint32_t res = 0;
 173    int i;
 174
 175    for (i = 0; i < size; i++)
 176        res += buf[i];
 177
 178    return ~res;
 179}
 180
 181
 182static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 183{
 184    if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
 185        return 100;
 186    return 0;
 187}
 188
 189static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
 190                              Error **errp)
 191{
 192    BDRVVPCState *s = bs->opaque;
 193    const char *size_calc;
 194
 195    size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
 196
 197    if (!size_calc) {
 198       /* no override, use autodetect only */
 199    } else if (!strcmp(size_calc, "current_size")) {
 200        s->force_use_sz = true;
 201    } else if (!strcmp(size_calc, "chs")) {
 202        s->force_use_chs = true;
 203    } else {
 204        error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
 205    }
 206}
 207
 208static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 209                    Error **errp)
 210{
 211    BDRVVPCState *s = bs->opaque;
 212    int i;
 213    VHDFooter *footer;
 214    VHDDynDiskHeader *dyndisk_header;
 215    QemuOpts *opts = NULL;
 216    Error *local_err = NULL;
 217    bool use_chs;
 218    uint8_t buf[HEADER_SIZE];
 219    uint32_t checksum;
 220    uint64_t computed_size;
 221    uint64_t pagetable_size;
 222    int disk_type = VHD_DYNAMIC;
 223    int ret;
 224
 225    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
 226    qemu_opts_absorb_qdict(opts, options, &local_err);
 227    if (local_err) {
 228        error_propagate(errp, local_err);
 229        ret = -EINVAL;
 230        goto fail;
 231    }
 232
 233    vpc_parse_options(bs, opts, &local_err);
 234    if (local_err) {
 235        error_propagate(errp, local_err);
 236        ret = -EINVAL;
 237        goto fail;
 238    }
 239
 240    ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
 241    if (ret < 0) {
 242        error_setg(errp, "Unable to read VHD header");
 243        goto fail;
 244    }
 245
 246    footer = (VHDFooter *) s->footer_buf;
 247    if (strncmp(footer->creator, "conectix", 8)) {
 248        int64_t offset = bdrv_getlength(bs->file->bs);
 249        if (offset < 0) {
 250            ret = offset;
 251            error_setg(errp, "Invalid file size");
 252            goto fail;
 253        } else if (offset < HEADER_SIZE) {
 254            ret = -EINVAL;
 255            error_setg(errp, "File too small for a VHD header");
 256            goto fail;
 257        }
 258
 259        /* If a fixed disk, the footer is found only at the end of the file */
 260        ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
 261                         HEADER_SIZE);
 262        if (ret < 0) {
 263            goto fail;
 264        }
 265        if (strncmp(footer->creator, "conectix", 8)) {
 266            error_setg(errp, "invalid VPC image");
 267            ret = -EINVAL;
 268            goto fail;
 269        }
 270        disk_type = VHD_FIXED;
 271    }
 272
 273    checksum = be32_to_cpu(footer->checksum);
 274    footer->checksum = 0;
 275    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
 276        fprintf(stderr, "block-vpc: The header checksum of '%s' is "
 277            "incorrect.\n", bs->filename);
 278
 279    /* Write 'checksum' back to footer, or else will leave it with zero. */
 280    footer->checksum = cpu_to_be32(checksum);
 281
 282    /* The visible size of a image in Virtual PC depends on the geometry
 283       rather than on the size stored in the footer (the size in the footer
 284       is too large usually) */
 285    bs->total_sectors = (int64_t)
 286        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 287
 288    /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
 289     * VHD image sizes differently.  VPC will rely on CHS geometry,
 290     * while Hyper-V and disk2vhd use the size specified in the footer.
 291     *
 292     * We use a couple of approaches to try and determine the correct method:
 293     * look at the Creator App field, and look for images that have CHS
 294     * geometry that is the maximum value.
 295     *
 296     * If the CHS geometry is the maximum CHS geometry, then we assume that
 297     * the size is the footer->current_size to avoid truncation.  Otherwise,
 298     * we follow the table based on footer->creator_app:
 299     *
 300     *  Known creator apps:
 301     *      'vpc '  :  CHS              Virtual PC (uses disk geometry)
 302     *      'qemu'  :  CHS              QEMU (uses disk geometry)
 303     *      'qem2'  :  current_size     QEMU (uses current_size)
 304     *      'win '  :  current_size     Hyper-V
 305     *      'd2v '  :  current_size     Disk2vhd
 306     *      'tap\0' :  current_size     XenServer
 307     *      'CTXS'  :  current_size     XenConverter
 308     *
 309     *  The user can override the table values via drive options, however
 310     *  even with an override we will still use current_size for images
 311     *  that have CHS geometry of the maximum size.
 312     */
 313    use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
 314               !!strncmp(footer->creator_app, "qem2", 4) &&
 315               !!strncmp(footer->creator_app, "d2v ", 4) &&
 316               !!strncmp(footer->creator_app, "CTXS", 4) &&
 317               !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
 318
 319    if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
 320        bs->total_sectors = be64_to_cpu(footer->current_size) /
 321                                        BDRV_SECTOR_SIZE;
 322    }
 323
 324    /* Allow a maximum disk size of 2040 GiB */
 325    if (bs->total_sectors > VHD_MAX_SECTORS) {
 326        ret = -EFBIG;
 327        goto fail;
 328    }
 329
 330    if (disk_type == VHD_DYNAMIC) {
 331        ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
 332                         HEADER_SIZE);
 333        if (ret < 0) {
 334            error_setg(errp, "Error reading dynamic VHD header");
 335            goto fail;
 336        }
 337
 338        dyndisk_header = (VHDDynDiskHeader *) buf;
 339
 340        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
 341            error_setg(errp, "Invalid header magic");
 342            ret = -EINVAL;
 343            goto fail;
 344        }
 345
 346        s->block_size = be32_to_cpu(dyndisk_header->block_size);
 347        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
 348            error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
 349            ret = -EINVAL;
 350            goto fail;
 351        }
 352        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
 353
 354        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
 355
 356        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
 357            error_setg(errp, "Too many blocks");
 358            ret = -EINVAL;
 359            goto fail;
 360        }
 361
 362        computed_size = (uint64_t) s->max_table_entries * s->block_size;
 363        if (computed_size < bs->total_sectors * 512) {
 364            error_setg(errp, "Page table too small");
 365            ret = -EINVAL;
 366            goto fail;
 367        }
 368
 369        if (s->max_table_entries > SIZE_MAX / 4 ||
 370            s->max_table_entries > (int) INT_MAX / 4) {
 371            error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
 372                        s->max_table_entries);
 373            ret = -EINVAL;
 374            goto fail;
 375        }
 376
 377        pagetable_size = (uint64_t) s->max_table_entries * 4;
 378
 379        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
 380        if (s->pagetable == NULL) {
 381            error_setg(errp, "Unable to allocate memory for page table");
 382            ret = -ENOMEM;
 383            goto fail;
 384        }
 385
 386        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
 387
 388        ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
 389                         pagetable_size);
 390        if (ret < 0) {
 391            error_setg(errp, "Error reading pagetable");
 392            goto fail;
 393        }
 394
 395        s->free_data_block_offset =
 396            ROUND_UP(s->bat_offset + pagetable_size, 512);
 397
 398        for (i = 0; i < s->max_table_entries; i++) {
 399            be32_to_cpus(&s->pagetable[i]);
 400            if (s->pagetable[i] != 0xFFFFFFFF) {
 401                int64_t next = (512 * (int64_t) s->pagetable[i]) +
 402                    s->bitmap_size + s->block_size;
 403
 404                if (next > s->free_data_block_offset) {
 405                    s->free_data_block_offset = next;
 406                }
 407            }
 408        }
 409
 410        if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
 411            error_setg(errp, "block-vpc: free_data_block_offset points after "
 412                             "the end of file. The image has been truncated.");
 413            ret = -EINVAL;
 414            goto fail;
 415        }
 416
 417        s->last_bitmap_offset = (int64_t) -1;
 418
 419#ifdef CACHE
 420        s->pageentry_u8 = g_malloc(512);
 421        s->pageentry_u32 = s->pageentry_u8;
 422        s->pageentry_u16 = s->pageentry_u8;
 423        s->last_pagetable = -1;
 424#endif
 425    }
 426
 427    qemu_co_mutex_init(&s->lock);
 428
 429    /* Disable migration when VHD images are used */
 430    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
 431               "does not support live migration",
 432               bdrv_get_device_or_node_name(bs));
 433    migrate_add_blocker(s->migration_blocker);
 434
 435    return 0;
 436
 437fail:
 438    qemu_vfree(s->pagetable);
 439#ifdef CACHE
 440    g_free(s->pageentry_u8);
 441#endif
 442    return ret;
 443}
 444
 445static int vpc_reopen_prepare(BDRVReopenState *state,
 446                              BlockReopenQueue *queue, Error **errp)
 447{
 448    return 0;
 449}
 450
 451/*
 452 * Returns the absolute byte offset of the given sector in the image file.
 453 * If the sector is not allocated, -1 is returned instead.
 454 *
 455 * The parameter write must be 1 if the offset will be used for a write
 456 * operation (the block bitmaps is updated then), 0 otherwise.
 457 */
 458static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
 459                                       bool write)
 460{
 461    BDRVVPCState *s = bs->opaque;
 462    uint64_t bitmap_offset, block_offset;
 463    uint32_t pagetable_index, offset_in_block;
 464
 465    pagetable_index = offset / s->block_size;
 466    offset_in_block = offset % s->block_size;
 467
 468    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
 469        return -1; /* not allocated */
 470
 471    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
 472    block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
 473
 474    /* We must ensure that we don't write to any sectors which are marked as
 475       unused in the bitmap. We get away with setting all bits in the block
 476       bitmap each time we write to a new block. This might cause Virtual PC to
 477       miss sparse read optimization, but it's not a problem in terms of
 478       correctness. */
 479    if (write && (s->last_bitmap_offset != bitmap_offset)) {
 480        uint8_t bitmap[s->bitmap_size];
 481
 482        s->last_bitmap_offset = bitmap_offset;
 483        memset(bitmap, 0xff, s->bitmap_size);
 484        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
 485    }
 486
 487    return block_offset;
 488}
 489
 490static inline int64_t get_sector_offset(BlockDriverState *bs,
 491                                        int64_t sector_num, bool write)
 492{
 493    return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
 494}
 495
 496/*
 497 * Writes the footer to the end of the image file. This is needed when the
 498 * file grows as it overwrites the old footer
 499 *
 500 * Returns 0 on success and < 0 on error
 501 */
 502static int rewrite_footer(BlockDriverState* bs)
 503{
 504    int ret;
 505    BDRVVPCState *s = bs->opaque;
 506    int64_t offset = s->free_data_block_offset;
 507
 508    ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
 509    if (ret < 0)
 510        return ret;
 511
 512    return 0;
 513}
 514
 515/*
 516 * Allocates a new block. This involves writing a new footer and updating
 517 * the Block Allocation Table to use the space at the old end of the image
 518 * file (overwriting the old footer)
 519 *
 520 * Returns the sectors' offset in the image file on success and < 0 on error
 521 */
 522static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
 523{
 524    BDRVVPCState *s = bs->opaque;
 525    int64_t bat_offset;
 526    uint32_t index, bat_value;
 527    int ret;
 528    uint8_t bitmap[s->bitmap_size];
 529
 530    /* Check if sector_num is valid */
 531    if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
 532        return -EINVAL;
 533    }
 534
 535    /* Write entry into in-memory BAT */
 536    index = offset / s->block_size;
 537    assert(s->pagetable[index] == 0xFFFFFFFF);
 538    s->pagetable[index] = s->free_data_block_offset / 512;
 539
 540    /* Initialize the block's bitmap */
 541    memset(bitmap, 0xff, s->bitmap_size);
 542    ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
 543        s->bitmap_size);
 544    if (ret < 0) {
 545        return ret;
 546    }
 547
 548    /* Write new footer (the old one will be overwritten) */
 549    s->free_data_block_offset += s->block_size + s->bitmap_size;
 550    ret = rewrite_footer(bs);
 551    if (ret < 0)
 552        goto fail;
 553
 554    /* Write BAT entry to disk */
 555    bat_offset = s->bat_offset + (4 * index);
 556    bat_value = cpu_to_be32(s->pagetable[index]);
 557    ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
 558    if (ret < 0)
 559        goto fail;
 560
 561    return get_image_offset(bs, offset, false);
 562
 563fail:
 564    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
 565    return ret;
 566}
 567
 568static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 569{
 570    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
 571    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 572
 573    if (be32_to_cpu(footer->type) != VHD_FIXED) {
 574        bdi->cluster_size = s->block_size;
 575    }
 576
 577    bdi->unallocated_blocks_are_zero = true;
 578    return 0;
 579}
 580
 581static int coroutine_fn
 582vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 583              QEMUIOVector *qiov, int flags)
 584{
 585    BDRVVPCState *s = bs->opaque;
 586    int ret;
 587    int64_t image_offset;
 588    int64_t n_bytes;
 589    int64_t bytes_done = 0;
 590    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 591    QEMUIOVector local_qiov;
 592
 593    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 594        return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
 595    }
 596
 597    qemu_co_mutex_lock(&s->lock);
 598    qemu_iovec_init(&local_qiov, qiov->niov);
 599
 600    while (bytes > 0) {
 601        image_offset = get_image_offset(bs, offset, false);
 602        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
 603
 604        if (image_offset == -1) {
 605            qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
 606        } else {
 607            qemu_iovec_reset(&local_qiov);
 608            qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 609
 610            ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
 611                                 &local_qiov, 0);
 612            if (ret < 0) {
 613                goto fail;
 614            }
 615        }
 616
 617        bytes -= n_bytes;
 618        offset += n_bytes;
 619        bytes_done += n_bytes;
 620    }
 621
 622    ret = 0;
 623fail:
 624    qemu_iovec_destroy(&local_qiov);
 625    qemu_co_mutex_unlock(&s->lock);
 626
 627    return ret;
 628}
 629
 630static int coroutine_fn
 631vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 632               QEMUIOVector *qiov, int flags)
 633{
 634    BDRVVPCState *s = bs->opaque;
 635    int64_t image_offset;
 636    int64_t n_bytes;
 637    int64_t bytes_done = 0;
 638    int ret;
 639    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 640    QEMUIOVector local_qiov;
 641
 642    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 643        return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
 644    }
 645
 646    qemu_co_mutex_lock(&s->lock);
 647    qemu_iovec_init(&local_qiov, qiov->niov);
 648
 649    while (bytes > 0) {
 650        image_offset = get_image_offset(bs, offset, true);
 651        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
 652
 653        if (image_offset == -1) {
 654            image_offset = alloc_block(bs, offset);
 655            if (image_offset < 0) {
 656                ret = image_offset;
 657                goto fail;
 658            }
 659        }
 660
 661        qemu_iovec_reset(&local_qiov);
 662        qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 663
 664        ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
 665                              &local_qiov, 0);
 666        if (ret < 0) {
 667            goto fail;
 668        }
 669
 670        bytes -= n_bytes;
 671        offset += n_bytes;
 672        bytes_done += n_bytes;
 673    }
 674
 675    ret = 0;
 676fail:
 677    qemu_iovec_destroy(&local_qiov);
 678    qemu_co_mutex_unlock(&s->lock);
 679
 680    return ret;
 681}
 682
 683static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
 684        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
 685{
 686    BDRVVPCState *s = bs->opaque;
 687    VHDFooter *footer = (VHDFooter*) s->footer_buf;
 688    int64_t start, offset;
 689    bool allocated;
 690    int n;
 691
 692    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 693        *pnum = nb_sectors;
 694        *file = bs->file->bs;
 695        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
 696               (sector_num << BDRV_SECTOR_BITS);
 697    }
 698
 699    offset = get_sector_offset(bs, sector_num, 0);
 700    start = offset;
 701    allocated = (offset != -1);
 702    *pnum = 0;
 703
 704    do {
 705        /* All sectors in a block are contiguous (without using the bitmap) */
 706        n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
 707          - sector_num;
 708        n = MIN(n, nb_sectors);
 709
 710        *pnum += n;
 711        sector_num += n;
 712        nb_sectors -= n;
 713        /* *pnum can't be greater than one block for allocated
 714         * sectors since there is always a bitmap in between. */
 715        if (allocated) {
 716            *file = bs->file->bs;
 717            return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
 718        }
 719        if (nb_sectors == 0) {
 720            break;
 721        }
 722        offset = get_sector_offset(bs, sector_num, 0);
 723    } while (offset == -1);
 724
 725    return 0;
 726}
 727
 728/*
 729 * Calculates the number of cylinders, heads and sectors per cylinder
 730 * based on a given number of sectors. This is the algorithm described
 731 * in the VHD specification.
 732 *
 733 * Note that the geometry doesn't always exactly match total_sectors but
 734 * may round it down.
 735 *
 736 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
 737 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
 738 * and instead allow up to 255 heads.
 739 */
 740static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
 741    uint8_t* heads, uint8_t* secs_per_cyl)
 742{
 743    uint32_t cyls_times_heads;
 744
 745    total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
 746
 747    if (total_sectors >= 65535LL * 16 * 63) {
 748        *secs_per_cyl = 255;
 749        *heads = 16;
 750        cyls_times_heads = total_sectors / *secs_per_cyl;
 751    } else {
 752        *secs_per_cyl = 17;
 753        cyls_times_heads = total_sectors / *secs_per_cyl;
 754        *heads = (cyls_times_heads + 1023) / 1024;
 755
 756        if (*heads < 4) {
 757            *heads = 4;
 758        }
 759
 760        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
 761            *secs_per_cyl = 31;
 762            *heads = 16;
 763            cyls_times_heads = total_sectors / *secs_per_cyl;
 764        }
 765
 766        if (cyls_times_heads >= (*heads * 1024)) {
 767            *secs_per_cyl = 63;
 768            *heads = 16;
 769            cyls_times_heads = total_sectors / *secs_per_cyl;
 770        }
 771    }
 772
 773    *cyls = cyls_times_heads / *heads;
 774
 775    return 0;
 776}
 777
 778static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
 779                               int64_t total_sectors)
 780{
 781    VHDDynDiskHeader *dyndisk_header =
 782        (VHDDynDiskHeader *) buf;
 783    size_t block_size, num_bat_entries;
 784    int i;
 785    int ret;
 786    int64_t offset = 0;
 787
 788    /* Write the footer (twice: at the beginning and at the end) */
 789    block_size = 0x200000;
 790    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
 791
 792    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
 793    if (ret < 0) {
 794        goto fail;
 795    }
 796
 797    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
 798    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
 799    if (ret < 0) {
 800        goto fail;
 801    }
 802
 803    /* Write the initial BAT */
 804    offset = 3 * 512;
 805
 806    memset(buf, 0xFF, 512);
 807    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
 808        ret = blk_pwrite(blk, offset, buf, 512, 0);
 809        if (ret < 0) {
 810            goto fail;
 811        }
 812        offset += 512;
 813    }
 814
 815    /* Prepare the Dynamic Disk Header */
 816    memset(buf, 0, 1024);
 817
 818    memcpy(dyndisk_header->magic, "cxsparse", 8);
 819
 820    /*
 821     * Note: The spec is actually wrong here for data_offset, it says
 822     * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
 823     */
 824    dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 825    dyndisk_header->table_offset = cpu_to_be64(3 * 512);
 826    dyndisk_header->version = cpu_to_be32(0x00010000);
 827    dyndisk_header->block_size = cpu_to_be32(block_size);
 828    dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
 829
 830    dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
 831
 832    /* Write the header */
 833    offset = 512;
 834
 835    ret = blk_pwrite(blk, offset, buf, 1024, 0);
 836    if (ret < 0) {
 837        goto fail;
 838    }
 839
 840 fail:
 841    return ret;
 842}
 843
 844static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
 845                             int64_t total_size)
 846{
 847    int ret;
 848
 849    /* Add footer to total size */
 850    total_size += HEADER_SIZE;
 851
 852    ret = blk_truncate(blk, total_size);
 853    if (ret < 0) {
 854        return ret;
 855    }
 856
 857    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
 858    if (ret < 0) {
 859        return ret;
 860    }
 861
 862    return ret;
 863}
 864
 865static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
 866{
 867    uint8_t buf[1024];
 868    VHDFooter *footer = (VHDFooter *) buf;
 869    char *disk_type_param;
 870    int i;
 871    uint16_t cyls = 0;
 872    uint8_t heads = 0;
 873    uint8_t secs_per_cyl = 0;
 874    int64_t total_sectors;
 875    int64_t total_size;
 876    int disk_type;
 877    int ret = -EIO;
 878    bool force_size;
 879    Error *local_err = NULL;
 880    BlockBackend *blk = NULL;
 881
 882    /* Read out options */
 883    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
 884                          BDRV_SECTOR_SIZE);
 885    disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
 886    if (disk_type_param) {
 887        if (!strcmp(disk_type_param, "dynamic")) {
 888            disk_type = VHD_DYNAMIC;
 889        } else if (!strcmp(disk_type_param, "fixed")) {
 890            disk_type = VHD_FIXED;
 891        } else {
 892            error_setg(errp, "Invalid disk type, %s", disk_type_param);
 893            ret = -EINVAL;
 894            goto out;
 895        }
 896    } else {
 897        disk_type = VHD_DYNAMIC;
 898    }
 899
 900    force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
 901
 902    ret = bdrv_create_file(filename, opts, &local_err);
 903    if (ret < 0) {
 904        error_propagate(errp, local_err);
 905        goto out;
 906    }
 907
 908    blk = blk_new_open(filename, NULL, NULL,
 909                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
 910    if (blk == NULL) {
 911        error_propagate(errp, local_err);
 912        ret = -EIO;
 913        goto out;
 914    }
 915
 916    blk_set_allow_write_beyond_eof(blk, true);
 917
 918    /*
 919     * Calculate matching total_size and geometry. Increase the number of
 920     * sectors requested until we get enough (or fail). This ensures that
 921     * qemu-img convert doesn't truncate images, but rather rounds up.
 922     *
 923     * If the image size can't be represented by a spec conformant CHS geometry,
 924     * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
 925     * the image size from the VHD footer to calculate total_sectors.
 926     */
 927    if (force_size) {
 928        /* This will force the use of total_size for sector count, below */
 929        cyls         = VHD_CHS_MAX_C;
 930        heads        = VHD_CHS_MAX_H;
 931        secs_per_cyl = VHD_CHS_MAX_S;
 932    } else {
 933        total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
 934        for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
 935            calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
 936        }
 937    }
 938
 939    if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
 940        total_sectors = total_size / BDRV_SECTOR_SIZE;
 941        /* Allow a maximum disk size of 2040 GiB */
 942        if (total_sectors > VHD_MAX_SECTORS) {
 943            error_setg(errp, "Disk size is too large, max size is 2040 GiB");
 944            ret = -EFBIG;
 945            goto out;
 946        }
 947    } else {
 948        total_sectors = (int64_t)cyls * heads * secs_per_cyl;
 949        total_size = total_sectors * BDRV_SECTOR_SIZE;
 950    }
 951
 952    /* Prepare the Hard Disk Footer */
 953    memset(buf, 0, 1024);
 954
 955    memcpy(footer->creator, "conectix", 8);
 956    if (force_size) {
 957        memcpy(footer->creator_app, "qem2", 4);
 958    } else {
 959        memcpy(footer->creator_app, "qemu", 4);
 960    }
 961    memcpy(footer->creator_os, "Wi2k", 4);
 962
 963    footer->features = cpu_to_be32(0x02);
 964    footer->version = cpu_to_be32(0x00010000);
 965    if (disk_type == VHD_DYNAMIC) {
 966        footer->data_offset = cpu_to_be64(HEADER_SIZE);
 967    } else {
 968        footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 969    }
 970    footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
 971
 972    /* Version of Virtual PC 2007 */
 973    footer->major = cpu_to_be16(0x0005);
 974    footer->minor = cpu_to_be16(0x0003);
 975    footer->orig_size = cpu_to_be64(total_size);
 976    footer->current_size = cpu_to_be64(total_size);
 977    footer->cyls = cpu_to_be16(cyls);
 978    footer->heads = heads;
 979    footer->secs_per_cyl = secs_per_cyl;
 980
 981    footer->type = cpu_to_be32(disk_type);
 982
 983#if defined(CONFIG_UUID)
 984    uuid_generate(footer->uuid);
 985#endif
 986
 987    footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
 988
 989    if (disk_type == VHD_DYNAMIC) {
 990        ret = create_dynamic_disk(blk, buf, total_sectors);
 991    } else {
 992        ret = create_fixed_disk(blk, buf, total_size);
 993    }
 994    if (ret < 0) {
 995        error_setg(errp, "Unable to create or write VHD header");
 996    }
 997
 998out:
 999    blk_unref(blk);
1000    g_free(disk_type_param);
1001    return ret;
1002}
1003
1004static int vpc_has_zero_init(BlockDriverState *bs)
1005{
1006    BDRVVPCState *s = bs->opaque;
1007    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
1008
1009    if (be32_to_cpu(footer->type) == VHD_FIXED) {
1010        return bdrv_has_zero_init(bs->file->bs);
1011    } else {
1012        return 1;
1013    }
1014}
1015
1016static void vpc_close(BlockDriverState *bs)
1017{
1018    BDRVVPCState *s = bs->opaque;
1019    qemu_vfree(s->pagetable);
1020#ifdef CACHE
1021    g_free(s->pageentry_u8);
1022#endif
1023
1024    migrate_del_blocker(s->migration_blocker);
1025    error_free(s->migration_blocker);
1026}
1027
1028static QemuOptsList vpc_create_opts = {
1029    .name = "vpc-create-opts",
1030    .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1031    .desc = {
1032        {
1033            .name = BLOCK_OPT_SIZE,
1034            .type = QEMU_OPT_SIZE,
1035            .help = "Virtual disk size"
1036        },
1037        {
1038            .name = BLOCK_OPT_SUBFMT,
1039            .type = QEMU_OPT_STRING,
1040            .help =
1041                "Type of virtual hard disk format. Supported formats are "
1042                "{dynamic (default) | fixed} "
1043        },
1044        {
1045            .name = VPC_OPT_FORCE_SIZE,
1046            .type = QEMU_OPT_BOOL,
1047            .help = "Force disk size calculation to use the actual size "
1048                    "specified, rather than using the nearest CHS-based "
1049                    "calculation"
1050        },
1051        { /* end of list */ }
1052    }
1053};
1054
1055static BlockDriver bdrv_vpc = {
1056    .format_name    = "vpc",
1057    .instance_size  = sizeof(BDRVVPCState),
1058
1059    .bdrv_probe             = vpc_probe,
1060    .bdrv_open              = vpc_open,
1061    .bdrv_close             = vpc_close,
1062    .bdrv_reopen_prepare    = vpc_reopen_prepare,
1063    .bdrv_create            = vpc_create,
1064
1065    .bdrv_co_preadv             = vpc_co_preadv,
1066    .bdrv_co_pwritev            = vpc_co_pwritev,
1067    .bdrv_co_get_block_status   = vpc_co_get_block_status,
1068
1069    .bdrv_get_info          = vpc_get_info,
1070
1071    .create_opts            = &vpc_create_opts,
1072    .bdrv_has_zero_init     = vpc_has_zero_init,
1073};
1074
1075static void bdrv_vpc_init(void)
1076{
1077    bdrv_register(&bdrv_vpc);
1078}
1079
1080block_init(bdrv_vpc_init);
1081