qemu/block/vpc.c
<<
>>
Prefs
   1/*
   2 * Block driver for Connectix / Microsoft Virtual PC images
   3 *
   4 * Copyright (c) 2005 Alex Beregszaszi
   5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25#include "qemu/osdep.h"
  26#include "qapi/error.h"
  27#include "qemu-common.h"
  28#include "block/block_int.h"
  29#include "sysemu/block-backend.h"
  30#include "qemu/module.h"
  31#include "migration/blocker.h"
  32#include "qemu/bswap.h"
  33#include "qemu/uuid.h"
  34
  35/**************************************************************/
  36
  37#define HEADER_SIZE 512
  38
  39//#define CACHE
  40
  41enum vhd_type {
  42    VHD_FIXED           = 2,
  43    VHD_DYNAMIC         = 3,
  44    VHD_DIFFERENCING    = 4,
  45};
  46
  47/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  48#define VHD_TIMESTAMP_BASE 946684800
  49
  50#define VHD_CHS_MAX_C   65535LL
  51#define VHD_CHS_MAX_H   16
  52#define VHD_CHS_MAX_S   255
  53
  54#define VHD_MAX_SECTORS       0xff000000    /* 2040 GiB max image size */
  55#define VHD_MAX_GEOMETRY      (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
  56
  57#define VPC_OPT_FORCE_SIZE "force_size"
  58
  59/* always big-endian */
  60typedef struct vhd_footer {
  61    char        creator[8]; /* "conectix" */
  62    uint32_t    features;
  63    uint32_t    version;
  64
  65    /* Offset of next header structure, 0xFFFFFFFF if none */
  66    uint64_t    data_offset;
  67
  68    /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  69    uint32_t    timestamp;
  70
  71    char        creator_app[4]; /*  e.g., "vpc " */
  72    uint16_t    major;
  73    uint16_t    minor;
  74    char        creator_os[4]; /* "Wi2k" */
  75
  76    uint64_t    orig_size;
  77    uint64_t    current_size;
  78
  79    uint16_t    cyls;
  80    uint8_t     heads;
  81    uint8_t     secs_per_cyl;
  82
  83    uint32_t    type;
  84
  85    /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
  86       the bytes in the footer without the checksum field") */
  87    uint32_t    checksum;
  88
  89    /* UUID used to identify a parent hard disk (backing file) */
  90    QemuUUID    uuid;
  91
  92    uint8_t     in_saved_state;
  93} QEMU_PACKED VHDFooter;
  94
  95typedef struct vhd_dyndisk_header {
  96    char        magic[8]; /* "cxsparse" */
  97
  98    /* Offset of next header structure, 0xFFFFFFFF if none */
  99    uint64_t    data_offset;
 100
 101    /* Offset of the Block Allocation Table (BAT) */
 102    uint64_t    table_offset;
 103
 104    uint32_t    version;
 105    uint32_t    max_table_entries; /* 32bit/entry */
 106
 107    /* 2 MB by default, must be a power of two */
 108    uint32_t    block_size;
 109
 110    uint32_t    checksum;
 111    uint8_t     parent_uuid[16];
 112    uint32_t    parent_timestamp;
 113    uint32_t    reserved;
 114
 115    /* Backing file name (in UTF-16) */
 116    uint8_t     parent_name[512];
 117
 118    struct {
 119        uint32_t    platform;
 120        uint32_t    data_space;
 121        uint32_t    data_length;
 122        uint32_t    reserved;
 123        uint64_t    data_offset;
 124    } parent_locator[8];
 125} QEMU_PACKED VHDDynDiskHeader;
 126
 127typedef struct BDRVVPCState {
 128    CoMutex lock;
 129    uint8_t footer_buf[HEADER_SIZE];
 130    uint64_t free_data_block_offset;
 131    int max_table_entries;
 132    uint32_t *pagetable;
 133    uint64_t bat_offset;
 134    uint64_t last_bitmap_offset;
 135
 136    uint32_t block_size;
 137    uint32_t bitmap_size;
 138    bool force_use_chs;
 139    bool force_use_sz;
 140
 141#ifdef CACHE
 142    uint8_t *pageentry_u8;
 143    uint32_t *pageentry_u32;
 144    uint16_t *pageentry_u16;
 145
 146    uint64_t last_bitmap;
 147#endif
 148
 149    Error *migration_blocker;
 150} BDRVVPCState;
 151
 152#define VPC_OPT_SIZE_CALC "force_size_calc"
 153static QemuOptsList vpc_runtime_opts = {
 154    .name = "vpc-runtime-opts",
 155    .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
 156    .desc = {
 157        {
 158            .name = VPC_OPT_SIZE_CALC,
 159            .type = QEMU_OPT_STRING,
 160            .help = "Force disk size calculation to use either CHS geometry, "
 161                    "or use the disk current_size specified in the VHD footer. "
 162                    "{chs, current_size}"
 163        },
 164        { /* end of list */ }
 165    }
 166};
 167
 168static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 169{
 170    uint32_t res = 0;
 171    int i;
 172
 173    for (i = 0; i < size; i++)
 174        res += buf[i];
 175
 176    return ~res;
 177}
 178
 179
 180static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 181{
 182    if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
 183        return 100;
 184    return 0;
 185}
 186
 187static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
 188                              Error **errp)
 189{
 190    BDRVVPCState *s = bs->opaque;
 191    const char *size_calc;
 192
 193    size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
 194
 195    if (!size_calc) {
 196       /* no override, use autodetect only */
 197    } else if (!strcmp(size_calc, "current_size")) {
 198        s->force_use_sz = true;
 199    } else if (!strcmp(size_calc, "chs")) {
 200        s->force_use_chs = true;
 201    } else {
 202        error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
 203    }
 204}
 205
 206static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 207                    Error **errp)
 208{
 209    BDRVVPCState *s = bs->opaque;
 210    int i;
 211    VHDFooter *footer;
 212    VHDDynDiskHeader *dyndisk_header;
 213    QemuOpts *opts = NULL;
 214    Error *local_err = NULL;
 215    bool use_chs;
 216    uint8_t buf[HEADER_SIZE];
 217    uint32_t checksum;
 218    uint64_t computed_size;
 219    uint64_t pagetable_size;
 220    int disk_type = VHD_DYNAMIC;
 221    int ret;
 222    int64_t bs_size;
 223
 224    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
 225                               false, errp);
 226    if (!bs->file) {
 227        return -EINVAL;
 228    }
 229
 230    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
 231    qemu_opts_absorb_qdict(opts, options, &local_err);
 232    if (local_err) {
 233        error_propagate(errp, local_err);
 234        ret = -EINVAL;
 235        goto fail;
 236    }
 237
 238    vpc_parse_options(bs, opts, &local_err);
 239    if (local_err) {
 240        error_propagate(errp, local_err);
 241        ret = -EINVAL;
 242        goto fail;
 243    }
 244
 245    ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
 246    if (ret < 0) {
 247        error_setg(errp, "Unable to read VHD header");
 248        goto fail;
 249    }
 250
 251    footer = (VHDFooter *) s->footer_buf;
 252    if (strncmp(footer->creator, "conectix", 8)) {
 253        int64_t offset = bdrv_getlength(bs->file->bs);
 254        if (offset < 0) {
 255            ret = offset;
 256            error_setg(errp, "Invalid file size");
 257            goto fail;
 258        } else if (offset < HEADER_SIZE) {
 259            ret = -EINVAL;
 260            error_setg(errp, "File too small for a VHD header");
 261            goto fail;
 262        }
 263
 264        /* If a fixed disk, the footer is found only at the end of the file */
 265        ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
 266                         HEADER_SIZE);
 267        if (ret < 0) {
 268            goto fail;
 269        }
 270        if (strncmp(footer->creator, "conectix", 8)) {
 271            error_setg(errp, "invalid VPC image");
 272            ret = -EINVAL;
 273            goto fail;
 274        }
 275        disk_type = VHD_FIXED;
 276    }
 277
 278    checksum = be32_to_cpu(footer->checksum);
 279    footer->checksum = 0;
 280    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
 281        fprintf(stderr, "block-vpc: The header checksum of '%s' is "
 282            "incorrect.\n", bs->filename);
 283
 284    /* Write 'checksum' back to footer, or else will leave it with zero. */
 285    footer->checksum = cpu_to_be32(checksum);
 286
 287    /* The visible size of a image in Virtual PC depends on the geometry
 288       rather than on the size stored in the footer (the size in the footer
 289       is too large usually) */
 290    bs->total_sectors = (int64_t)
 291        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 292
 293    /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
 294     * VHD image sizes differently.  VPC will rely on CHS geometry,
 295     * while Hyper-V and disk2vhd use the size specified in the footer.
 296     *
 297     * We use a couple of approaches to try and determine the correct method:
 298     * look at the Creator App field, and look for images that have CHS
 299     * geometry that is the maximum value.
 300     *
 301     * If the CHS geometry is the maximum CHS geometry, then we assume that
 302     * the size is the footer->current_size to avoid truncation.  Otherwise,
 303     * we follow the table based on footer->creator_app:
 304     *
 305     *  Known creator apps:
 306     *      'vpc '  :  CHS              Virtual PC (uses disk geometry)
 307     *      'qemu'  :  CHS              QEMU (uses disk geometry)
 308     *      'qem2'  :  current_size     QEMU (uses current_size)
 309     *      'win '  :  current_size     Hyper-V
 310     *      'd2v '  :  current_size     Disk2vhd
 311     *      'tap\0' :  current_size     XenServer
 312     *      'CTXS'  :  current_size     XenConverter
 313     *
 314     *  The user can override the table values via drive options, however
 315     *  even with an override we will still use current_size for images
 316     *  that have CHS geometry of the maximum size.
 317     */
 318    use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
 319               !!strncmp(footer->creator_app, "qem2", 4) &&
 320               !!strncmp(footer->creator_app, "d2v ", 4) &&
 321               !!strncmp(footer->creator_app, "CTXS", 4) &&
 322               !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
 323
 324    if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
 325        bs->total_sectors = be64_to_cpu(footer->current_size) /
 326                                        BDRV_SECTOR_SIZE;
 327    }
 328
 329    /* Allow a maximum disk size of 2040 GiB */
 330    if (bs->total_sectors > VHD_MAX_SECTORS) {
 331        ret = -EFBIG;
 332        goto fail;
 333    }
 334
 335    if (disk_type == VHD_DYNAMIC) {
 336        ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
 337                         HEADER_SIZE);
 338        if (ret < 0) {
 339            error_setg(errp, "Error reading dynamic VHD header");
 340            goto fail;
 341        }
 342
 343        dyndisk_header = (VHDDynDiskHeader *) buf;
 344
 345        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
 346            error_setg(errp, "Invalid header magic");
 347            ret = -EINVAL;
 348            goto fail;
 349        }
 350
 351        s->block_size = be32_to_cpu(dyndisk_header->block_size);
 352        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
 353            error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
 354            ret = -EINVAL;
 355            goto fail;
 356        }
 357        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
 358
 359        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
 360
 361        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
 362            error_setg(errp, "Too many blocks");
 363            ret = -EINVAL;
 364            goto fail;
 365        }
 366
 367        computed_size = (uint64_t) s->max_table_entries * s->block_size;
 368        if (computed_size < bs->total_sectors * 512) {
 369            error_setg(errp, "Page table too small");
 370            ret = -EINVAL;
 371            goto fail;
 372        }
 373
 374        if (s->max_table_entries > SIZE_MAX / 4 ||
 375            s->max_table_entries > (int) INT_MAX / 4) {
 376            error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
 377                        s->max_table_entries);
 378            ret = -EINVAL;
 379            goto fail;
 380        }
 381
 382        pagetable_size = (uint64_t) s->max_table_entries * 4;
 383
 384        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
 385        if (s->pagetable == NULL) {
 386            error_setg(errp, "Unable to allocate memory for page table");
 387            ret = -ENOMEM;
 388            goto fail;
 389        }
 390
 391        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
 392
 393        ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
 394                         pagetable_size);
 395        if (ret < 0) {
 396            error_setg(errp, "Error reading pagetable");
 397            goto fail;
 398        }
 399
 400        s->free_data_block_offset =
 401            ROUND_UP(s->bat_offset + pagetable_size, 512);
 402
 403        for (i = 0; i < s->max_table_entries; i++) {
 404            be32_to_cpus(&s->pagetable[i]);
 405            if (s->pagetable[i] != 0xFFFFFFFF) {
 406                int64_t next = (512 * (int64_t) s->pagetable[i]) +
 407                    s->bitmap_size + s->block_size;
 408
 409                if (next > s->free_data_block_offset) {
 410                    s->free_data_block_offset = next;
 411                }
 412            }
 413        }
 414
 415        bs_size = bdrv_getlength(bs->file->bs);
 416        if (bs_size < 0) {
 417            error_setg_errno(errp, -bs_size, "Unable to learn image size");
 418            ret = bs_size;
 419            goto fail;
 420        }
 421        if (s->free_data_block_offset > bs_size) {
 422            error_setg(errp, "block-vpc: free_data_block_offset points after "
 423                             "the end of file. The image has been truncated.");
 424            ret = -EINVAL;
 425            goto fail;
 426        }
 427
 428        s->last_bitmap_offset = (int64_t) -1;
 429
 430#ifdef CACHE
 431        s->pageentry_u8 = g_malloc(512);
 432        s->pageentry_u32 = s->pageentry_u8;
 433        s->pageentry_u16 = s->pageentry_u8;
 434        s->last_pagetable = -1;
 435#endif
 436    }
 437
 438    /* Disable migration when VHD images are used */
 439    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
 440               "does not support live migration",
 441               bdrv_get_device_or_node_name(bs));
 442    ret = migrate_add_blocker(s->migration_blocker, &local_err);
 443    if (local_err) {
 444        error_propagate(errp, local_err);
 445        error_free(s->migration_blocker);
 446        goto fail;
 447    }
 448
 449    qemu_co_mutex_init(&s->lock);
 450
 451    return 0;
 452
 453fail:
 454    qemu_vfree(s->pagetable);
 455#ifdef CACHE
 456    g_free(s->pageentry_u8);
 457#endif
 458    return ret;
 459}
 460
 461static int vpc_reopen_prepare(BDRVReopenState *state,
 462                              BlockReopenQueue *queue, Error **errp)
 463{
 464    return 0;
 465}
 466
 467/*
 468 * Returns the absolute byte offset of the given sector in the image file.
 469 * If the sector is not allocated, -1 is returned instead.
 470 * If an error occurred trying to write an updated block bitmap back to
 471 * the file, -2 is returned, and the error value is written to *err.
 472 * This can only happen for a write operation.
 473 *
 474 * The parameter write must be 1 if the offset will be used for a write
 475 * operation (the block bitmaps is updated then), 0 otherwise.
 476 * If write is true then err must not be NULL.
 477 */
 478static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
 479                                       bool write, int *err)
 480{
 481    BDRVVPCState *s = bs->opaque;
 482    uint64_t bitmap_offset, block_offset;
 483    uint32_t pagetable_index, offset_in_block;
 484
 485    assert(!(write && err == NULL));
 486
 487    pagetable_index = offset / s->block_size;
 488    offset_in_block = offset % s->block_size;
 489
 490    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
 491        return -1; /* not allocated */
 492
 493    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
 494    block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
 495
 496    /* We must ensure that we don't write to any sectors which are marked as
 497       unused in the bitmap. We get away with setting all bits in the block
 498       bitmap each time we write to a new block. This might cause Virtual PC to
 499       miss sparse read optimization, but it's not a problem in terms of
 500       correctness. */
 501    if (write && (s->last_bitmap_offset != bitmap_offset)) {
 502        uint8_t bitmap[s->bitmap_size];
 503        int r;
 504
 505        s->last_bitmap_offset = bitmap_offset;
 506        memset(bitmap, 0xff, s->bitmap_size);
 507        r = bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
 508        if (r < 0) {
 509            *err = r;
 510            return -2;
 511        }
 512    }
 513
 514    return block_offset;
 515}
 516
 517/*
 518 * Writes the footer to the end of the image file. This is needed when the
 519 * file grows as it overwrites the old footer
 520 *
 521 * Returns 0 on success and < 0 on error
 522 */
 523static int rewrite_footer(BlockDriverState* bs)
 524{
 525    int ret;
 526    BDRVVPCState *s = bs->opaque;
 527    int64_t offset = s->free_data_block_offset;
 528
 529    ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
 530    if (ret < 0)
 531        return ret;
 532
 533    return 0;
 534}
 535
 536/*
 537 * Allocates a new block. This involves writing a new footer and updating
 538 * the Block Allocation Table to use the space at the old end of the image
 539 * file (overwriting the old footer)
 540 *
 541 * Returns the sectors' offset in the image file on success and < 0 on error
 542 */
 543static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
 544{
 545    BDRVVPCState *s = bs->opaque;
 546    int64_t bat_offset;
 547    uint32_t index, bat_value;
 548    int ret;
 549    uint8_t bitmap[s->bitmap_size];
 550
 551    /* Check if sector_num is valid */
 552    if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
 553        return -EINVAL;
 554    }
 555
 556    /* Write entry into in-memory BAT */
 557    index = offset / s->block_size;
 558    assert(s->pagetable[index] == 0xFFFFFFFF);
 559    s->pagetable[index] = s->free_data_block_offset / 512;
 560
 561    /* Initialize the block's bitmap */
 562    memset(bitmap, 0xff, s->bitmap_size);
 563    ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
 564        s->bitmap_size);
 565    if (ret < 0) {
 566        return ret;
 567    }
 568
 569    /* Write new footer (the old one will be overwritten) */
 570    s->free_data_block_offset += s->block_size + s->bitmap_size;
 571    ret = rewrite_footer(bs);
 572    if (ret < 0)
 573        goto fail;
 574
 575    /* Write BAT entry to disk */
 576    bat_offset = s->bat_offset + (4 * index);
 577    bat_value = cpu_to_be32(s->pagetable[index]);
 578    ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
 579    if (ret < 0)
 580        goto fail;
 581
 582    return get_image_offset(bs, offset, false, NULL);
 583
 584fail:
 585    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
 586    return ret;
 587}
 588
 589static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 590{
 591    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
 592    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 593
 594    if (be32_to_cpu(footer->type) != VHD_FIXED) {
 595        bdi->cluster_size = s->block_size;
 596    }
 597
 598    bdi->unallocated_blocks_are_zero = true;
 599    return 0;
 600}
 601
 602static int coroutine_fn
 603vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 604              QEMUIOVector *qiov, int flags)
 605{
 606    BDRVVPCState *s = bs->opaque;
 607    int ret;
 608    int64_t image_offset;
 609    int64_t n_bytes;
 610    int64_t bytes_done = 0;
 611    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 612    QEMUIOVector local_qiov;
 613
 614    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 615        return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
 616    }
 617
 618    qemu_co_mutex_lock(&s->lock);
 619    qemu_iovec_init(&local_qiov, qiov->niov);
 620
 621    while (bytes > 0) {
 622        image_offset = get_image_offset(bs, offset, false, NULL);
 623        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
 624
 625        if (image_offset == -1) {
 626            qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
 627        } else {
 628            qemu_iovec_reset(&local_qiov);
 629            qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 630
 631            ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
 632                                 &local_qiov, 0);
 633            if (ret < 0) {
 634                goto fail;
 635            }
 636        }
 637
 638        bytes -= n_bytes;
 639        offset += n_bytes;
 640        bytes_done += n_bytes;
 641    }
 642
 643    ret = 0;
 644fail:
 645    qemu_iovec_destroy(&local_qiov);
 646    qemu_co_mutex_unlock(&s->lock);
 647
 648    return ret;
 649}
 650
 651static int coroutine_fn
 652vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 653               QEMUIOVector *qiov, int flags)
 654{
 655    BDRVVPCState *s = bs->opaque;
 656    int64_t image_offset;
 657    int64_t n_bytes;
 658    int64_t bytes_done = 0;
 659    int ret = 0;
 660    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 661    QEMUIOVector local_qiov;
 662
 663    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 664        return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
 665    }
 666
 667    qemu_co_mutex_lock(&s->lock);
 668    qemu_iovec_init(&local_qiov, qiov->niov);
 669
 670    while (bytes > 0) {
 671        image_offset = get_image_offset(bs, offset, true, &ret);
 672        if (image_offset == -2) {
 673            /* Failed to write block bitmap: can't proceed with write */
 674            goto fail;
 675        }
 676        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
 677
 678        if (image_offset == -1) {
 679            image_offset = alloc_block(bs, offset);
 680            if (image_offset < 0) {
 681                ret = image_offset;
 682                goto fail;
 683            }
 684        }
 685
 686        qemu_iovec_reset(&local_qiov);
 687        qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 688
 689        ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
 690                              &local_qiov, 0);
 691        if (ret < 0) {
 692            goto fail;
 693        }
 694
 695        bytes -= n_bytes;
 696        offset += n_bytes;
 697        bytes_done += n_bytes;
 698    }
 699
 700    ret = 0;
 701fail:
 702    qemu_iovec_destroy(&local_qiov);
 703    qemu_co_mutex_unlock(&s->lock);
 704
 705    return ret;
 706}
 707
 708static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
 709        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
 710{
 711    BDRVVPCState *s = bs->opaque;
 712    VHDFooter *footer = (VHDFooter*) s->footer_buf;
 713    int64_t start, offset;
 714    bool allocated;
 715    int64_t ret;
 716    int n;
 717
 718    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 719        *pnum = nb_sectors;
 720        *file = bs->file->bs;
 721        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
 722               (sector_num << BDRV_SECTOR_BITS);
 723    }
 724
 725    qemu_co_mutex_lock(&s->lock);
 726
 727    offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false, NULL);
 728    start = offset;
 729    allocated = (offset != -1);
 730    *pnum = 0;
 731    ret = 0;
 732
 733    do {
 734        /* All sectors in a block are contiguous (without using the bitmap) */
 735        n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
 736          - sector_num;
 737        n = MIN(n, nb_sectors);
 738
 739        *pnum += n;
 740        sector_num += n;
 741        nb_sectors -= n;
 742        /* *pnum can't be greater than one block for allocated
 743         * sectors since there is always a bitmap in between. */
 744        if (allocated) {
 745            *file = bs->file->bs;
 746            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
 747            break;
 748        }
 749        if (nb_sectors == 0) {
 750            break;
 751        }
 752        offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false,
 753                                  NULL);
 754    } while (offset == -1);
 755
 756    qemu_co_mutex_unlock(&s->lock);
 757    return ret;
 758}
 759
 760/*
 761 * Calculates the number of cylinders, heads and sectors per cylinder
 762 * based on a given number of sectors. This is the algorithm described
 763 * in the VHD specification.
 764 *
 765 * Note that the geometry doesn't always exactly match total_sectors but
 766 * may round it down.
 767 *
 768 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
 769 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
 770 * and instead allow up to 255 heads.
 771 */
 772static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
 773    uint8_t* heads, uint8_t* secs_per_cyl)
 774{
 775    uint32_t cyls_times_heads;
 776
 777    total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
 778
 779    if (total_sectors >= 65535LL * 16 * 63) {
 780        *secs_per_cyl = 255;
 781        *heads = 16;
 782        cyls_times_heads = total_sectors / *secs_per_cyl;
 783    } else {
 784        *secs_per_cyl = 17;
 785        cyls_times_heads = total_sectors / *secs_per_cyl;
 786        *heads = DIV_ROUND_UP(cyls_times_heads, 1024);
 787
 788        if (*heads < 4) {
 789            *heads = 4;
 790        }
 791
 792        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
 793            *secs_per_cyl = 31;
 794            *heads = 16;
 795            cyls_times_heads = total_sectors / *secs_per_cyl;
 796        }
 797
 798        if (cyls_times_heads >= (*heads * 1024)) {
 799            *secs_per_cyl = 63;
 800            *heads = 16;
 801            cyls_times_heads = total_sectors / *secs_per_cyl;
 802        }
 803    }
 804
 805    *cyls = cyls_times_heads / *heads;
 806
 807    return 0;
 808}
 809
 810static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
 811                               int64_t total_sectors)
 812{
 813    VHDDynDiskHeader *dyndisk_header =
 814        (VHDDynDiskHeader *) buf;
 815    size_t block_size, num_bat_entries;
 816    int i;
 817    int ret;
 818    int64_t offset = 0;
 819
 820    /* Write the footer (twice: at the beginning and at the end) */
 821    block_size = 0x200000;
 822    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
 823
 824    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
 825    if (ret < 0) {
 826        goto fail;
 827    }
 828
 829    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
 830    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
 831    if (ret < 0) {
 832        goto fail;
 833    }
 834
 835    /* Write the initial BAT */
 836    offset = 3 * 512;
 837
 838    memset(buf, 0xFF, 512);
 839    for (i = 0; i < DIV_ROUND_UP(num_bat_entries * 4, 512); i++) {
 840        ret = blk_pwrite(blk, offset, buf, 512, 0);
 841        if (ret < 0) {
 842            goto fail;
 843        }
 844        offset += 512;
 845    }
 846
 847    /* Prepare the Dynamic Disk Header */
 848    memset(buf, 0, 1024);
 849
 850    memcpy(dyndisk_header->magic, "cxsparse", 8);
 851
 852    /*
 853     * Note: The spec is actually wrong here for data_offset, it says
 854     * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
 855     */
 856    dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 857    dyndisk_header->table_offset = cpu_to_be64(3 * 512);
 858    dyndisk_header->version = cpu_to_be32(0x00010000);
 859    dyndisk_header->block_size = cpu_to_be32(block_size);
 860    dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
 861
 862    dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
 863
 864    /* Write the header */
 865    offset = 512;
 866
 867    ret = blk_pwrite(blk, offset, buf, 1024, 0);
 868    if (ret < 0) {
 869        goto fail;
 870    }
 871
 872 fail:
 873    return ret;
 874}
 875
 876static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
 877                             int64_t total_size, Error **errp)
 878{
 879    int ret;
 880
 881    /* Add footer to total size */
 882    total_size += HEADER_SIZE;
 883
 884    ret = blk_truncate(blk, total_size, PREALLOC_MODE_OFF, errp);
 885    if (ret < 0) {
 886        return ret;
 887    }
 888
 889    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
 890    if (ret < 0) {
 891        error_setg_errno(errp, -ret, "Unable to write VHD header");
 892        return ret;
 893    }
 894
 895    return ret;
 896}
 897
 898static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
 899{
 900    uint8_t buf[1024];
 901    VHDFooter *footer = (VHDFooter *) buf;
 902    char *disk_type_param;
 903    int i;
 904    uint16_t cyls = 0;
 905    uint8_t heads = 0;
 906    uint8_t secs_per_cyl = 0;
 907    int64_t total_sectors;
 908    int64_t total_size;
 909    int disk_type;
 910    int ret = -EIO;
 911    bool force_size;
 912    Error *local_err = NULL;
 913    BlockBackend *blk = NULL;
 914
 915    /* Read out options */
 916    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
 917                          BDRV_SECTOR_SIZE);
 918    disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
 919    if (disk_type_param) {
 920        if (!strcmp(disk_type_param, "dynamic")) {
 921            disk_type = VHD_DYNAMIC;
 922        } else if (!strcmp(disk_type_param, "fixed")) {
 923            disk_type = VHD_FIXED;
 924        } else {
 925            error_setg(errp, "Invalid disk type, %s", disk_type_param);
 926            ret = -EINVAL;
 927            goto out;
 928        }
 929    } else {
 930        disk_type = VHD_DYNAMIC;
 931    }
 932
 933    force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
 934
 935    ret = bdrv_create_file(filename, opts, &local_err);
 936    if (ret < 0) {
 937        error_propagate(errp, local_err);
 938        goto out;
 939    }
 940
 941    blk = blk_new_open(filename, NULL, NULL,
 942                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
 943                       &local_err);
 944    if (blk == NULL) {
 945        error_propagate(errp, local_err);
 946        ret = -EIO;
 947        goto out;
 948    }
 949
 950    blk_set_allow_write_beyond_eof(blk, true);
 951
 952    /*
 953     * Calculate matching total_size and geometry. Increase the number of
 954     * sectors requested until we get enough (or fail). This ensures that
 955     * qemu-img convert doesn't truncate images, but rather rounds up.
 956     *
 957     * If the image size can't be represented by a spec conformant CHS geometry,
 958     * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
 959     * the image size from the VHD footer to calculate total_sectors.
 960     */
 961    if (force_size) {
 962        /* This will force the use of total_size for sector count, below */
 963        cyls         = VHD_CHS_MAX_C;
 964        heads        = VHD_CHS_MAX_H;
 965        secs_per_cyl = VHD_CHS_MAX_S;
 966    } else {
 967        total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
 968        for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
 969            calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
 970        }
 971    }
 972
 973    if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
 974        total_sectors = total_size / BDRV_SECTOR_SIZE;
 975        /* Allow a maximum disk size of 2040 GiB */
 976        if (total_sectors > VHD_MAX_SECTORS) {
 977            error_setg(errp, "Disk size is too large, max size is 2040 GiB");
 978            ret = -EFBIG;
 979            goto out;
 980        }
 981    } else {
 982        total_sectors = (int64_t)cyls * heads * secs_per_cyl;
 983        total_size = total_sectors * BDRV_SECTOR_SIZE;
 984    }
 985
 986    /* Prepare the Hard Disk Footer */
 987    memset(buf, 0, 1024);
 988
 989    memcpy(footer->creator, "conectix", 8);
 990    if (force_size) {
 991        memcpy(footer->creator_app, "qem2", 4);
 992    } else {
 993        memcpy(footer->creator_app, "qemu", 4);
 994    }
 995    memcpy(footer->creator_os, "Wi2k", 4);
 996
 997    footer->features = cpu_to_be32(0x02);
 998    footer->version = cpu_to_be32(0x00010000);
 999    if (disk_type == VHD_DYNAMIC) {
1000        footer->data_offset = cpu_to_be64(HEADER_SIZE);
1001    } else {
1002        footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
1003    }
1004    footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
1005
1006    /* Version of Virtual PC 2007 */
1007    footer->major = cpu_to_be16(0x0005);
1008    footer->minor = cpu_to_be16(0x0003);
1009    footer->orig_size = cpu_to_be64(total_size);
1010    footer->current_size = cpu_to_be64(total_size);
1011    footer->cyls = cpu_to_be16(cyls);
1012    footer->heads = heads;
1013    footer->secs_per_cyl = secs_per_cyl;
1014
1015    footer->type = cpu_to_be32(disk_type);
1016
1017    qemu_uuid_generate(&footer->uuid);
1018
1019    footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
1020
1021    if (disk_type == VHD_DYNAMIC) {
1022        ret = create_dynamic_disk(blk, buf, total_sectors);
1023        if (ret < 0) {
1024            error_setg(errp, "Unable to create or write VHD header");
1025        }
1026    } else {
1027        ret = create_fixed_disk(blk, buf, total_size, errp);
1028    }
1029
1030out:
1031    blk_unref(blk);
1032    g_free(disk_type_param);
1033    return ret;
1034}
1035
1036static int vpc_has_zero_init(BlockDriverState *bs)
1037{
1038    BDRVVPCState *s = bs->opaque;
1039    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
1040
1041    if (be32_to_cpu(footer->type) == VHD_FIXED) {
1042        return bdrv_has_zero_init(bs->file->bs);
1043    } else {
1044        return 1;
1045    }
1046}
1047
1048static void vpc_close(BlockDriverState *bs)
1049{
1050    BDRVVPCState *s = bs->opaque;
1051    qemu_vfree(s->pagetable);
1052#ifdef CACHE
1053    g_free(s->pageentry_u8);
1054#endif
1055
1056    migrate_del_blocker(s->migration_blocker);
1057    error_free(s->migration_blocker);
1058}
1059
1060static QemuOptsList vpc_create_opts = {
1061    .name = "vpc-create-opts",
1062    .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1063    .desc = {
1064        {
1065            .name = BLOCK_OPT_SIZE,
1066            .type = QEMU_OPT_SIZE,
1067            .help = "Virtual disk size"
1068        },
1069        {
1070            .name = BLOCK_OPT_SUBFMT,
1071            .type = QEMU_OPT_STRING,
1072            .help =
1073                "Type of virtual hard disk format. Supported formats are "
1074                "{dynamic (default) | fixed} "
1075        },
1076        {
1077            .name = VPC_OPT_FORCE_SIZE,
1078            .type = QEMU_OPT_BOOL,
1079            .help = "Force disk size calculation to use the actual size "
1080                    "specified, rather than using the nearest CHS-based "
1081                    "calculation"
1082        },
1083        { /* end of list */ }
1084    }
1085};
1086
1087static BlockDriver bdrv_vpc = {
1088    .format_name    = "vpc",
1089    .instance_size  = sizeof(BDRVVPCState),
1090
1091    .bdrv_probe             = vpc_probe,
1092    .bdrv_open              = vpc_open,
1093    .bdrv_close             = vpc_close,
1094    .bdrv_reopen_prepare    = vpc_reopen_prepare,
1095    .bdrv_child_perm        = bdrv_format_default_perms,
1096    .bdrv_create            = vpc_create,
1097
1098    .bdrv_co_preadv             = vpc_co_preadv,
1099    .bdrv_co_pwritev            = vpc_co_pwritev,
1100    .bdrv_co_get_block_status   = vpc_co_get_block_status,
1101
1102    .bdrv_get_info          = vpc_get_info,
1103
1104    .create_opts            = &vpc_create_opts,
1105    .bdrv_has_zero_init     = vpc_has_zero_init,
1106};
1107
1108static void bdrv_vpc_init(void)
1109{
1110    bdrv_register(&bdrv_vpc);
1111}
1112
1113block_init(bdrv_vpc_init);
1114