qemu/block/vpc.c
<<
>>
Prefs
   1/*
   2 * Block driver for Connectix / Microsoft Virtual PC images
   3 *
   4 * Copyright (c) 2005 Alex Beregszaszi
   5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25#include "qemu/osdep.h"
  26#include "qapi/error.h"
  27#include "qemu-common.h"
  28#include "block/block_int.h"
  29#include "sysemu/block-backend.h"
  30#include "qemu/module.h"
  31#include "migration/migration.h"
  32#if defined(CONFIG_UUID)
  33#include <uuid/uuid.h>
  34#endif
  35
  36/**************************************************************/
  37
  38#define HEADER_SIZE 512
  39
  40//#define CACHE
  41
  42enum vhd_type {
  43    VHD_FIXED           = 2,
  44    VHD_DYNAMIC         = 3,
  45    VHD_DIFFERENCING    = 4,
  46};
  47
  48/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  49#define VHD_TIMESTAMP_BASE 946684800
  50
  51#define VHD_CHS_MAX_C   65535LL
  52#define VHD_CHS_MAX_H   16
  53#define VHD_CHS_MAX_S   255
  54
  55#define VHD_MAX_SECTORS       0xff000000    /* 2040 GiB max image size */
  56#define VHD_MAX_GEOMETRY      (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
  57
  58#define VPC_OPT_FORCE_SIZE "force_size"
  59
  60/* always big-endian */
  61typedef struct vhd_footer {
  62    char        creator[8]; /* "conectix" */
  63    uint32_t    features;
  64    uint32_t    version;
  65
  66    /* Offset of next header structure, 0xFFFFFFFF if none */
  67    uint64_t    data_offset;
  68
  69    /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  70    uint32_t    timestamp;
  71
  72    char        creator_app[4]; /*  e.g., "vpc " */
  73    uint16_t    major;
  74    uint16_t    minor;
  75    char        creator_os[4]; /* "Wi2k" */
  76
  77    uint64_t    orig_size;
  78    uint64_t    current_size;
  79
  80    uint16_t    cyls;
  81    uint8_t     heads;
  82    uint8_t     secs_per_cyl;
  83
  84    uint32_t    type;
  85
  86    /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
  87       the bytes in the footer without the checksum field") */
  88    uint32_t    checksum;
  89
  90    /* UUID used to identify a parent hard disk (backing file) */
  91    uint8_t     uuid[16];
  92
  93    uint8_t     in_saved_state;
  94} QEMU_PACKED VHDFooter;
  95
  96typedef struct vhd_dyndisk_header {
  97    char        magic[8]; /* "cxsparse" */
  98
  99    /* Offset of next header structure, 0xFFFFFFFF if none */
 100    uint64_t    data_offset;
 101
 102    /* Offset of the Block Allocation Table (BAT) */
 103    uint64_t    table_offset;
 104
 105    uint32_t    version;
 106    uint32_t    max_table_entries; /* 32bit/entry */
 107
 108    /* 2 MB by default, must be a power of two */
 109    uint32_t    block_size;
 110
 111    uint32_t    checksum;
 112    uint8_t     parent_uuid[16];
 113    uint32_t    parent_timestamp;
 114    uint32_t    reserved;
 115
 116    /* Backing file name (in UTF-16) */
 117    uint8_t     parent_name[512];
 118
 119    struct {
 120        uint32_t    platform;
 121        uint32_t    data_space;
 122        uint32_t    data_length;
 123        uint32_t    reserved;
 124        uint64_t    data_offset;
 125    } parent_locator[8];
 126} QEMU_PACKED VHDDynDiskHeader;
 127
 128typedef struct BDRVVPCState {
 129    CoMutex lock;
 130    uint8_t footer_buf[HEADER_SIZE];
 131    uint64_t free_data_block_offset;
 132    int max_table_entries;
 133    uint32_t *pagetable;
 134    uint64_t bat_offset;
 135    uint64_t last_bitmap_offset;
 136
 137    uint32_t block_size;
 138    uint32_t bitmap_size;
 139    bool force_use_chs;
 140    bool force_use_sz;
 141
 142#ifdef CACHE
 143    uint8_t *pageentry_u8;
 144    uint32_t *pageentry_u32;
 145    uint16_t *pageentry_u16;
 146
 147    uint64_t last_bitmap;
 148#endif
 149
 150    Error *migration_blocker;
 151} BDRVVPCState;
 152
 153#define VPC_OPT_SIZE_CALC "force_size_calc"
 154static QemuOptsList vpc_runtime_opts = {
 155    .name = "vpc-runtime-opts",
 156    .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
 157    .desc = {
 158        {
 159            .name = VPC_OPT_SIZE_CALC,
 160            .type = QEMU_OPT_STRING,
 161            .help = "Force disk size calculation to use either CHS geometry, "
 162                    "or use the disk current_size specified in the VHD footer. "
 163                    "{chs, current_size}"
 164        },
 165        { /* end of list */ }
 166    }
 167};
 168
 169static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 170{
 171    uint32_t res = 0;
 172    int i;
 173
 174    for (i = 0; i < size; i++)
 175        res += buf[i];
 176
 177    return ~res;
 178}
 179
 180
 181static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 182{
 183    if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
 184        return 100;
 185    return 0;
 186}
 187
 188static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
 189                              Error **errp)
 190{
 191    BDRVVPCState *s = bs->opaque;
 192    const char *size_calc;
 193
 194    size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
 195
 196    if (!size_calc) {
 197       /* no override, use autodetect only */
 198    } else if (!strcmp(size_calc, "current_size")) {
 199        s->force_use_sz = true;
 200    } else if (!strcmp(size_calc, "chs")) {
 201        s->force_use_chs = true;
 202    } else {
 203        error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
 204    }
 205}
 206
 207static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 208                    Error **errp)
 209{
 210    BDRVVPCState *s = bs->opaque;
 211    int i;
 212    VHDFooter *footer;
 213    VHDDynDiskHeader *dyndisk_header;
 214    QemuOpts *opts = NULL;
 215    Error *local_err = NULL;
 216    bool use_chs;
 217    uint8_t buf[HEADER_SIZE];
 218    uint32_t checksum;
 219    uint64_t computed_size;
 220    uint64_t pagetable_size;
 221    int disk_type = VHD_DYNAMIC;
 222    int ret;
 223
 224    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
 225    qemu_opts_absorb_qdict(opts, options, &local_err);
 226    if (local_err) {
 227        error_propagate(errp, local_err);
 228        ret = -EINVAL;
 229        goto fail;
 230    }
 231
 232    vpc_parse_options(bs, opts, &local_err);
 233    if (local_err) {
 234        error_propagate(errp, local_err);
 235        ret = -EINVAL;
 236        goto fail;
 237    }
 238
 239    ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
 240    if (ret < 0) {
 241        error_setg(errp, "Unable to read VHD header");
 242        goto fail;
 243    }
 244
 245    footer = (VHDFooter *) s->footer_buf;
 246    if (strncmp(footer->creator, "conectix", 8)) {
 247        int64_t offset = bdrv_getlength(bs->file->bs);
 248        if (offset < 0) {
 249            ret = offset;
 250            error_setg(errp, "Invalid file size");
 251            goto fail;
 252        } else if (offset < HEADER_SIZE) {
 253            ret = -EINVAL;
 254            error_setg(errp, "File too small for a VHD header");
 255            goto fail;
 256        }
 257
 258        /* If a fixed disk, the footer is found only at the end of the file */
 259        ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
 260                         HEADER_SIZE);
 261        if (ret < 0) {
 262            goto fail;
 263        }
 264        if (strncmp(footer->creator, "conectix", 8)) {
 265            error_setg(errp, "invalid VPC image");
 266            ret = -EINVAL;
 267            goto fail;
 268        }
 269        disk_type = VHD_FIXED;
 270    }
 271
 272    checksum = be32_to_cpu(footer->checksum);
 273    footer->checksum = 0;
 274    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
 275        fprintf(stderr, "block-vpc: The header checksum of '%s' is "
 276            "incorrect.\n", bs->filename);
 277
 278    /* Write 'checksum' back to footer, or else will leave it with zero. */
 279    footer->checksum = cpu_to_be32(checksum);
 280
 281    /* The visible size of a image in Virtual PC depends on the geometry
 282       rather than on the size stored in the footer (the size in the footer
 283       is too large usually) */
 284    bs->total_sectors = (int64_t)
 285        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 286
 287    /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
 288     * VHD image sizes differently.  VPC will rely on CHS geometry,
 289     * while Hyper-V and disk2vhd use the size specified in the footer.
 290     *
 291     * We use a couple of approaches to try and determine the correct method:
 292     * look at the Creator App field, and look for images that have CHS
 293     * geometry that is the maximum value.
 294     *
 295     * If the CHS geometry is the maximum CHS geometry, then we assume that
 296     * the size is the footer->current_size to avoid truncation.  Otherwise,
 297     * we follow the table based on footer->creator_app:
 298     *
 299     *  Known creator apps:
 300     *      'vpc '  :  CHS              Virtual PC (uses disk geometry)
 301     *      'qemu'  :  CHS              QEMU (uses disk geometry)
 302     *      'qem2'  :  current_size     QEMU (uses current_size)
 303     *      'win '  :  current_size     Hyper-V
 304     *      'd2v '  :  current_size     Disk2vhd
 305     *      'tap\0' :  current_size     XenServer
 306     *      'CTXS'  :  current_size     XenConverter
 307     *
 308     *  The user can override the table values via drive options, however
 309     *  even with an override we will still use current_size for images
 310     *  that have CHS geometry of the maximum size.
 311     */
 312    use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
 313               !!strncmp(footer->creator_app, "qem2", 4) &&
 314               !!strncmp(footer->creator_app, "d2v ", 4) &&
 315               !!strncmp(footer->creator_app, "CTXS", 4) &&
 316               !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
 317
 318    if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
 319        bs->total_sectors = be64_to_cpu(footer->current_size) /
 320                                        BDRV_SECTOR_SIZE;
 321    }
 322
 323    /* Allow a maximum disk size of 2040 GiB */
 324    if (bs->total_sectors > VHD_MAX_SECTORS) {
 325        ret = -EFBIG;
 326        goto fail;
 327    }
 328
 329    if (disk_type == VHD_DYNAMIC) {
 330        ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
 331                         HEADER_SIZE);
 332        if (ret < 0) {
 333            error_setg(errp, "Error reading dynamic VHD header");
 334            goto fail;
 335        }
 336
 337        dyndisk_header = (VHDDynDiskHeader *) buf;
 338
 339        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
 340            error_setg(errp, "Invalid header magic");
 341            ret = -EINVAL;
 342            goto fail;
 343        }
 344
 345        s->block_size = be32_to_cpu(dyndisk_header->block_size);
 346        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
 347            error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
 348            ret = -EINVAL;
 349            goto fail;
 350        }
 351        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
 352
 353        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
 354
 355        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
 356            error_setg(errp, "Too many blocks");
 357            ret = -EINVAL;
 358            goto fail;
 359        }
 360
 361        computed_size = (uint64_t) s->max_table_entries * s->block_size;
 362        if (computed_size < bs->total_sectors * 512) {
 363            error_setg(errp, "Page table too small");
 364            ret = -EINVAL;
 365            goto fail;
 366        }
 367
 368        if (s->max_table_entries > SIZE_MAX / 4 ||
 369            s->max_table_entries > (int) INT_MAX / 4) {
 370            error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
 371                        s->max_table_entries);
 372            ret = -EINVAL;
 373            goto fail;
 374        }
 375
 376        pagetable_size = (uint64_t) s->max_table_entries * 4;
 377
 378        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
 379        if (s->pagetable == NULL) {
 380            error_setg(errp, "Unable to allocate memory for page table");
 381            ret = -ENOMEM;
 382            goto fail;
 383        }
 384
 385        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
 386
 387        ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
 388                         pagetable_size);
 389        if (ret < 0) {
 390            error_setg(errp, "Error reading pagetable");
 391            goto fail;
 392        }
 393
 394        s->free_data_block_offset =
 395            ROUND_UP(s->bat_offset + pagetable_size, 512);
 396
 397        for (i = 0; i < s->max_table_entries; i++) {
 398            be32_to_cpus(&s->pagetable[i]);
 399            if (s->pagetable[i] != 0xFFFFFFFF) {
 400                int64_t next = (512 * (int64_t) s->pagetable[i]) +
 401                    s->bitmap_size + s->block_size;
 402
 403                if (next > s->free_data_block_offset) {
 404                    s->free_data_block_offset = next;
 405                }
 406            }
 407        }
 408
 409        if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
 410            error_setg(errp, "block-vpc: free_data_block_offset points after "
 411                             "the end of file. The image has been truncated.");
 412            ret = -EINVAL;
 413            goto fail;
 414        }
 415
 416        s->last_bitmap_offset = (int64_t) -1;
 417
 418#ifdef CACHE
 419        s->pageentry_u8 = g_malloc(512);
 420        s->pageentry_u32 = s->pageentry_u8;
 421        s->pageentry_u16 = s->pageentry_u8;
 422        s->last_pagetable = -1;
 423#endif
 424    }
 425
 426    qemu_co_mutex_init(&s->lock);
 427
 428    /* Disable migration when VHD images are used */
 429    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
 430               "does not support live migration",
 431               bdrv_get_device_or_node_name(bs));
 432    migrate_add_blocker(s->migration_blocker);
 433
 434    return 0;
 435
 436fail:
 437    qemu_vfree(s->pagetable);
 438#ifdef CACHE
 439    g_free(s->pageentry_u8);
 440#endif
 441    return ret;
 442}
 443
 444static int vpc_reopen_prepare(BDRVReopenState *state,
 445                              BlockReopenQueue *queue, Error **errp)
 446{
 447    return 0;
 448}
 449
 450/*
 451 * Returns the absolute byte offset of the given sector in the image file.
 452 * If the sector is not allocated, -1 is returned instead.
 453 *
 454 * The parameter write must be 1 if the offset will be used for a write
 455 * operation (the block bitmaps is updated then), 0 otherwise.
 456 */
 457static inline int64_t get_sector_offset(BlockDriverState *bs,
 458    int64_t sector_num, int write)
 459{
 460    BDRVVPCState *s = bs->opaque;
 461    uint64_t offset = sector_num * 512;
 462    uint64_t bitmap_offset, block_offset;
 463    uint32_t pagetable_index, pageentry_index;
 464
 465    pagetable_index = offset / s->block_size;
 466    pageentry_index = (offset % s->block_size) / 512;
 467
 468    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
 469        return -1; /* not allocated */
 470
 471    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
 472    block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
 473
 474    /* We must ensure that we don't write to any sectors which are marked as
 475       unused in the bitmap. We get away with setting all bits in the block
 476       bitmap each time we write to a new block. This might cause Virtual PC to
 477       miss sparse read optimization, but it's not a problem in terms of
 478       correctness. */
 479    if (write && (s->last_bitmap_offset != bitmap_offset)) {
 480        uint8_t bitmap[s->bitmap_size];
 481
 482        s->last_bitmap_offset = bitmap_offset;
 483        memset(bitmap, 0xff, s->bitmap_size);
 484        bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
 485    }
 486
 487    return block_offset;
 488}
 489
 490/*
 491 * Writes the footer to the end of the image file. This is needed when the
 492 * file grows as it overwrites the old footer
 493 *
 494 * Returns 0 on success and < 0 on error
 495 */
 496static int rewrite_footer(BlockDriverState* bs)
 497{
 498    int ret;
 499    BDRVVPCState *s = bs->opaque;
 500    int64_t offset = s->free_data_block_offset;
 501
 502    ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
 503    if (ret < 0)
 504        return ret;
 505
 506    return 0;
 507}
 508
 509/*
 510 * Allocates a new block. This involves writing a new footer and updating
 511 * the Block Allocation Table to use the space at the old end of the image
 512 * file (overwriting the old footer)
 513 *
 514 * Returns the sectors' offset in the image file on success and < 0 on error
 515 */
 516static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
 517{
 518    BDRVVPCState *s = bs->opaque;
 519    int64_t bat_offset;
 520    uint32_t index, bat_value;
 521    int ret;
 522    uint8_t bitmap[s->bitmap_size];
 523
 524    /* Check if sector_num is valid */
 525    if ((sector_num < 0) || (sector_num > bs->total_sectors))
 526        return -1;
 527
 528    /* Write entry into in-memory BAT */
 529    index = (sector_num * 512) / s->block_size;
 530    if (s->pagetable[index] != 0xFFFFFFFF)
 531        return -1;
 532
 533    s->pagetable[index] = s->free_data_block_offset / 512;
 534
 535    /* Initialize the block's bitmap */
 536    memset(bitmap, 0xff, s->bitmap_size);
 537    ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
 538        s->bitmap_size);
 539    if (ret < 0) {
 540        return ret;
 541    }
 542
 543    /* Write new footer (the old one will be overwritten) */
 544    s->free_data_block_offset += s->block_size + s->bitmap_size;
 545    ret = rewrite_footer(bs);
 546    if (ret < 0)
 547        goto fail;
 548
 549    /* Write BAT entry to disk */
 550    bat_offset = s->bat_offset + (4 * index);
 551    bat_value = cpu_to_be32(s->pagetable[index]);
 552    ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
 553    if (ret < 0)
 554        goto fail;
 555
 556    return get_sector_offset(bs, sector_num, 0);
 557
 558fail:
 559    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
 560    return -1;
 561}
 562
 563static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 564{
 565    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
 566    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 567
 568    if (be32_to_cpu(footer->type) != VHD_FIXED) {
 569        bdi->cluster_size = s->block_size;
 570    }
 571
 572    bdi->unallocated_blocks_are_zero = true;
 573    return 0;
 574}
 575
 576static int vpc_read(BlockDriverState *bs, int64_t sector_num,
 577                    uint8_t *buf, int nb_sectors)
 578{
 579    BDRVVPCState *s = bs->opaque;
 580    int ret;
 581    int64_t offset;
 582    int64_t sectors, sectors_per_block;
 583    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 584
 585    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 586        return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
 587    }
 588    while (nb_sectors > 0) {
 589        offset = get_sector_offset(bs, sector_num, 0);
 590
 591        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
 592        sectors = sectors_per_block - (sector_num % sectors_per_block);
 593        if (sectors > nb_sectors) {
 594            sectors = nb_sectors;
 595        }
 596
 597        if (offset == -1) {
 598            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
 599        } else {
 600            ret = bdrv_pread(bs->file->bs, offset, buf,
 601                sectors * BDRV_SECTOR_SIZE);
 602            if (ret != sectors * BDRV_SECTOR_SIZE) {
 603                return -1;
 604            }
 605        }
 606
 607        nb_sectors -= sectors;
 608        sector_num += sectors;
 609        buf += sectors * BDRV_SECTOR_SIZE;
 610    }
 611    return 0;
 612}
 613
 614static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
 615                                    uint8_t *buf, int nb_sectors)
 616{
 617    int ret;
 618    BDRVVPCState *s = bs->opaque;
 619    qemu_co_mutex_lock(&s->lock);
 620    ret = vpc_read(bs, sector_num, buf, nb_sectors);
 621    qemu_co_mutex_unlock(&s->lock);
 622    return ret;
 623}
 624
 625static int vpc_write(BlockDriverState *bs, int64_t sector_num,
 626    const uint8_t *buf, int nb_sectors)
 627{
 628    BDRVVPCState *s = bs->opaque;
 629    int64_t offset;
 630    int64_t sectors, sectors_per_block;
 631    int ret;
 632    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 633
 634    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 635        return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
 636    }
 637    while (nb_sectors > 0) {
 638        offset = get_sector_offset(bs, sector_num, 1);
 639
 640        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
 641        sectors = sectors_per_block - (sector_num % sectors_per_block);
 642        if (sectors > nb_sectors) {
 643            sectors = nb_sectors;
 644        }
 645
 646        if (offset == -1) {
 647            offset = alloc_block(bs, sector_num);
 648            if (offset < 0)
 649                return -1;
 650        }
 651
 652        ret = bdrv_pwrite(bs->file->bs, offset, buf,
 653                          sectors * BDRV_SECTOR_SIZE);
 654        if (ret != sectors * BDRV_SECTOR_SIZE) {
 655            return -1;
 656        }
 657
 658        nb_sectors -= sectors;
 659        sector_num += sectors;
 660        buf += sectors * BDRV_SECTOR_SIZE;
 661    }
 662
 663    return 0;
 664}
 665
 666static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
 667                                     const uint8_t *buf, int nb_sectors)
 668{
 669    int ret;
 670    BDRVVPCState *s = bs->opaque;
 671    qemu_co_mutex_lock(&s->lock);
 672    ret = vpc_write(bs, sector_num, buf, nb_sectors);
 673    qemu_co_mutex_unlock(&s->lock);
 674    return ret;
 675}
 676
 677static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
 678        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
 679{
 680    BDRVVPCState *s = bs->opaque;
 681    VHDFooter *footer = (VHDFooter*) s->footer_buf;
 682    int64_t start, offset;
 683    bool allocated;
 684    int n;
 685
 686    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 687        *pnum = nb_sectors;
 688        *file = bs->file->bs;
 689        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
 690               (sector_num << BDRV_SECTOR_BITS);
 691    }
 692
 693    offset = get_sector_offset(bs, sector_num, 0);
 694    start = offset;
 695    allocated = (offset != -1);
 696    *pnum = 0;
 697
 698    do {
 699        /* All sectors in a block are contiguous (without using the bitmap) */
 700        n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
 701          - sector_num;
 702        n = MIN(n, nb_sectors);
 703
 704        *pnum += n;
 705        sector_num += n;
 706        nb_sectors -= n;
 707        /* *pnum can't be greater than one block for allocated
 708         * sectors since there is always a bitmap in between. */
 709        if (allocated) {
 710            *file = bs->file->bs;
 711            return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
 712        }
 713        if (nb_sectors == 0) {
 714            break;
 715        }
 716        offset = get_sector_offset(bs, sector_num, 0);
 717    } while (offset == -1);
 718
 719    return 0;
 720}
 721
 722/*
 723 * Calculates the number of cylinders, heads and sectors per cylinder
 724 * based on a given number of sectors. This is the algorithm described
 725 * in the VHD specification.
 726 *
 727 * Note that the geometry doesn't always exactly match total_sectors but
 728 * may round it down.
 729 *
 730 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
 731 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
 732 * and instead allow up to 255 heads.
 733 */
 734static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
 735    uint8_t* heads, uint8_t* secs_per_cyl)
 736{
 737    uint32_t cyls_times_heads;
 738
 739    total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
 740
 741    if (total_sectors >= 65535LL * 16 * 63) {
 742        *secs_per_cyl = 255;
 743        *heads = 16;
 744        cyls_times_heads = total_sectors / *secs_per_cyl;
 745    } else {
 746        *secs_per_cyl = 17;
 747        cyls_times_heads = total_sectors / *secs_per_cyl;
 748        *heads = (cyls_times_heads + 1023) / 1024;
 749
 750        if (*heads < 4) {
 751            *heads = 4;
 752        }
 753
 754        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
 755            *secs_per_cyl = 31;
 756            *heads = 16;
 757            cyls_times_heads = total_sectors / *secs_per_cyl;
 758        }
 759
 760        if (cyls_times_heads >= (*heads * 1024)) {
 761            *secs_per_cyl = 63;
 762            *heads = 16;
 763            cyls_times_heads = total_sectors / *secs_per_cyl;
 764        }
 765    }
 766
 767    *cyls = cyls_times_heads / *heads;
 768
 769    return 0;
 770}
 771
 772static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
 773                               int64_t total_sectors)
 774{
 775    VHDDynDiskHeader *dyndisk_header =
 776        (VHDDynDiskHeader *) buf;
 777    size_t block_size, num_bat_entries;
 778    int i;
 779    int ret;
 780    int64_t offset = 0;
 781
 782    /* Write the footer (twice: at the beginning and at the end) */
 783    block_size = 0x200000;
 784    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
 785
 786    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
 787    if (ret < 0) {
 788        goto fail;
 789    }
 790
 791    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
 792    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
 793    if (ret < 0) {
 794        goto fail;
 795    }
 796
 797    /* Write the initial BAT */
 798    offset = 3 * 512;
 799
 800    memset(buf, 0xFF, 512);
 801    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
 802        ret = blk_pwrite(blk, offset, buf, 512);
 803        if (ret < 0) {
 804            goto fail;
 805        }
 806        offset += 512;
 807    }
 808
 809    /* Prepare the Dynamic Disk Header */
 810    memset(buf, 0, 1024);
 811
 812    memcpy(dyndisk_header->magic, "cxsparse", 8);
 813
 814    /*
 815     * Note: The spec is actually wrong here for data_offset, it says
 816     * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
 817     */
 818    dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 819    dyndisk_header->table_offset = cpu_to_be64(3 * 512);
 820    dyndisk_header->version = cpu_to_be32(0x00010000);
 821    dyndisk_header->block_size = cpu_to_be32(block_size);
 822    dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
 823
 824    dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
 825
 826    /* Write the header */
 827    offset = 512;
 828
 829    ret = blk_pwrite(blk, offset, buf, 1024);
 830    if (ret < 0) {
 831        goto fail;
 832    }
 833
 834 fail:
 835    return ret;
 836}
 837
 838static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
 839                             int64_t total_size)
 840{
 841    int ret;
 842
 843    /* Add footer to total size */
 844    total_size += HEADER_SIZE;
 845
 846    ret = blk_truncate(blk, total_size);
 847    if (ret < 0) {
 848        return ret;
 849    }
 850
 851    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
 852    if (ret < 0) {
 853        return ret;
 854    }
 855
 856    return ret;
 857}
 858
 859static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
 860{
 861    uint8_t buf[1024];
 862    VHDFooter *footer = (VHDFooter *) buf;
 863    char *disk_type_param;
 864    int i;
 865    uint16_t cyls = 0;
 866    uint8_t heads = 0;
 867    uint8_t secs_per_cyl = 0;
 868    int64_t total_sectors;
 869    int64_t total_size;
 870    int disk_type;
 871    int ret = -EIO;
 872    bool force_size;
 873    Error *local_err = NULL;
 874    BlockBackend *blk = NULL;
 875
 876    /* Read out options */
 877    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
 878                          BDRV_SECTOR_SIZE);
 879    disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
 880    if (disk_type_param) {
 881        if (!strcmp(disk_type_param, "dynamic")) {
 882            disk_type = VHD_DYNAMIC;
 883        } else if (!strcmp(disk_type_param, "fixed")) {
 884            disk_type = VHD_FIXED;
 885        } else {
 886            error_setg(errp, "Invalid disk type, %s", disk_type_param);
 887            ret = -EINVAL;
 888            goto out;
 889        }
 890    } else {
 891        disk_type = VHD_DYNAMIC;
 892    }
 893
 894    force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
 895
 896    ret = bdrv_create_file(filename, opts, &local_err);
 897    if (ret < 0) {
 898        error_propagate(errp, local_err);
 899        goto out;
 900    }
 901
 902    blk = blk_new_open(filename, NULL, NULL,
 903                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
 904    if (blk == NULL) {
 905        error_propagate(errp, local_err);
 906        ret = -EIO;
 907        goto out;
 908    }
 909
 910    blk_set_allow_write_beyond_eof(blk, true);
 911
 912    /*
 913     * Calculate matching total_size and geometry. Increase the number of
 914     * sectors requested until we get enough (or fail). This ensures that
 915     * qemu-img convert doesn't truncate images, but rather rounds up.
 916     *
 917     * If the image size can't be represented by a spec conformant CHS geometry,
 918     * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
 919     * the image size from the VHD footer to calculate total_sectors.
 920     */
 921    if (force_size) {
 922        /* This will force the use of total_size for sector count, below */
 923        cyls         = VHD_CHS_MAX_C;
 924        heads        = VHD_CHS_MAX_H;
 925        secs_per_cyl = VHD_CHS_MAX_S;
 926    } else {
 927        total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
 928        for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
 929            calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
 930        }
 931    }
 932
 933    if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
 934        total_sectors = total_size / BDRV_SECTOR_SIZE;
 935        /* Allow a maximum disk size of 2040 GiB */
 936        if (total_sectors > VHD_MAX_SECTORS) {
 937            error_setg(errp, "Disk size is too large, max size is 2040 GiB");
 938            ret = -EFBIG;
 939            goto out;
 940        }
 941    } else {
 942        total_sectors = (int64_t)cyls * heads * secs_per_cyl;
 943        total_size = total_sectors * BDRV_SECTOR_SIZE;
 944    }
 945
 946    /* Prepare the Hard Disk Footer */
 947    memset(buf, 0, 1024);
 948
 949    memcpy(footer->creator, "conectix", 8);
 950    if (force_size) {
 951        memcpy(footer->creator_app, "qem2", 4);
 952    } else {
 953        memcpy(footer->creator_app, "qemu", 4);
 954    }
 955    memcpy(footer->creator_os, "Wi2k", 4);
 956
 957    footer->features = cpu_to_be32(0x02);
 958    footer->version = cpu_to_be32(0x00010000);
 959    if (disk_type == VHD_DYNAMIC) {
 960        footer->data_offset = cpu_to_be64(HEADER_SIZE);
 961    } else {
 962        footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 963    }
 964    footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
 965
 966    /* Version of Virtual PC 2007 */
 967    footer->major = cpu_to_be16(0x0005);
 968    footer->minor = cpu_to_be16(0x0003);
 969    footer->orig_size = cpu_to_be64(total_size);
 970    footer->current_size = cpu_to_be64(total_size);
 971    footer->cyls = cpu_to_be16(cyls);
 972    footer->heads = heads;
 973    footer->secs_per_cyl = secs_per_cyl;
 974
 975    footer->type = cpu_to_be32(disk_type);
 976
 977#if defined(CONFIG_UUID)
 978    uuid_generate(footer->uuid);
 979#endif
 980
 981    footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
 982
 983    if (disk_type == VHD_DYNAMIC) {
 984        ret = create_dynamic_disk(blk, buf, total_sectors);
 985    } else {
 986        ret = create_fixed_disk(blk, buf, total_size);
 987    }
 988    if (ret < 0) {
 989        error_setg(errp, "Unable to create or write VHD header");
 990    }
 991
 992out:
 993    blk_unref(blk);
 994    g_free(disk_type_param);
 995    return ret;
 996}
 997
 998static int vpc_has_zero_init(BlockDriverState *bs)
 999{
1000    BDRVVPCState *s = bs->opaque;
1001    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
1002
1003    if (be32_to_cpu(footer->type) == VHD_FIXED) {
1004        return bdrv_has_zero_init(bs->file->bs);
1005    } else {
1006        return 1;
1007    }
1008}
1009
1010static void vpc_close(BlockDriverState *bs)
1011{
1012    BDRVVPCState *s = bs->opaque;
1013    qemu_vfree(s->pagetable);
1014#ifdef CACHE
1015    g_free(s->pageentry_u8);
1016#endif
1017
1018    migrate_del_blocker(s->migration_blocker);
1019    error_free(s->migration_blocker);
1020}
1021
1022static QemuOptsList vpc_create_opts = {
1023    .name = "vpc-create-opts",
1024    .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1025    .desc = {
1026        {
1027            .name = BLOCK_OPT_SIZE,
1028            .type = QEMU_OPT_SIZE,
1029            .help = "Virtual disk size"
1030        },
1031        {
1032            .name = BLOCK_OPT_SUBFMT,
1033            .type = QEMU_OPT_STRING,
1034            .help =
1035                "Type of virtual hard disk format. Supported formats are "
1036                "{dynamic (default) | fixed} "
1037        },
1038        {
1039            .name = VPC_OPT_FORCE_SIZE,
1040            .type = QEMU_OPT_BOOL,
1041            .help = "Force disk size calculation to use the actual size "
1042                    "specified, rather than using the nearest CHS-based "
1043                    "calculation"
1044        },
1045        { /* end of list */ }
1046    }
1047};
1048
1049static BlockDriver bdrv_vpc = {
1050    .format_name    = "vpc",
1051    .instance_size  = sizeof(BDRVVPCState),
1052
1053    .bdrv_probe             = vpc_probe,
1054    .bdrv_open              = vpc_open,
1055    .bdrv_close             = vpc_close,
1056    .bdrv_reopen_prepare    = vpc_reopen_prepare,
1057    .bdrv_create            = vpc_create,
1058
1059    .bdrv_read                  = vpc_co_read,
1060    .bdrv_write                 = vpc_co_write,
1061    .bdrv_co_get_block_status   = vpc_co_get_block_status,
1062
1063    .bdrv_get_info          = vpc_get_info,
1064
1065    .create_opts            = &vpc_create_opts,
1066    .bdrv_has_zero_init     = vpc_has_zero_init,
1067};
1068
1069static void bdrv_vpc_init(void)
1070{
1071    bdrv_register(&bdrv_vpc);
1072}
1073
1074block_init(bdrv_vpc_init);
1075