qemu/block/vpc.c
<<
>>
Prefs
   1/*
   2 * Block driver for Connectix / Microsoft Virtual PC images
   3 *
   4 * Copyright (c) 2005 Alex Beregszaszi
   5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qapi/error.h"
  28#include "block/block_int.h"
  29#include "sysemu/block-backend.h"
  30#include "qemu/module.h"
  31#include "qemu/option.h"
  32#include "migration/blocker.h"
  33#include "qemu/bswap.h"
  34#include "qemu/uuid.h"
  35#include "qapi/qmp/qdict.h"
  36#include "qapi/qobject-input-visitor.h"
  37#include "qapi/qapi-visit-block-core.h"
  38
  39/**************************************************************/
  40
  41#define HEADER_SIZE 512
  42
  43//#define CACHE
  44
  45enum vhd_type {
  46    VHD_FIXED           = 2,
  47    VHD_DYNAMIC         = 3,
  48    VHD_DIFFERENCING    = 4,
  49};
  50
  51/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  52#define VHD_TIMESTAMP_BASE 946684800
  53
  54#define VHD_CHS_MAX_C   65535LL
  55#define VHD_CHS_MAX_H   16
  56#define VHD_CHS_MAX_S   255
  57
  58#define VHD_MAX_SECTORS       0xff000000    /* 2040 GiB max image size */
  59#define VHD_MAX_GEOMETRY      (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
  60
  61#define VPC_OPT_FORCE_SIZE "force_size"
  62
  63/* always big-endian */
  64typedef struct vhd_footer {
  65    char        creator[8]; /* "conectix" */
  66    uint32_t    features;
  67    uint32_t    version;
  68
  69    /* Offset of next header structure, 0xFFFFFFFF if none */
  70    uint64_t    data_offset;
  71
  72    /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  73    uint32_t    timestamp;
  74
  75    char        creator_app[4]; /*  e.g., "vpc " */
  76    uint16_t    major;
  77    uint16_t    minor;
  78    char        creator_os[4]; /* "Wi2k" */
  79
  80    uint64_t    orig_size;
  81    uint64_t    current_size;
  82
  83    uint16_t    cyls;
  84    uint8_t     heads;
  85    uint8_t     secs_per_cyl;
  86
  87    uint32_t    type;
  88
  89    /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
  90       the bytes in the footer without the checksum field") */
  91    uint32_t    checksum;
  92
  93    /* UUID used to identify a parent hard disk (backing file) */
  94    QemuUUID    uuid;
  95
  96    uint8_t     in_saved_state;
  97} QEMU_PACKED VHDFooter;
  98
  99typedef struct vhd_dyndisk_header {
 100    char        magic[8]; /* "cxsparse" */
 101
 102    /* Offset of next header structure, 0xFFFFFFFF if none */
 103    uint64_t    data_offset;
 104
 105    /* Offset of the Block Allocation Table (BAT) */
 106    uint64_t    table_offset;
 107
 108    uint32_t    version;
 109    uint32_t    max_table_entries; /* 32bit/entry */
 110
 111    /* 2 MB by default, must be a power of two */
 112    uint32_t    block_size;
 113
 114    uint32_t    checksum;
 115    uint8_t     parent_uuid[16];
 116    uint32_t    parent_timestamp;
 117    uint32_t    reserved;
 118
 119    /* Backing file name (in UTF-16) */
 120    uint8_t     parent_name[512];
 121
 122    struct {
 123        uint32_t    platform;
 124        uint32_t    data_space;
 125        uint32_t    data_length;
 126        uint32_t    reserved;
 127        uint64_t    data_offset;
 128    } parent_locator[8];
 129} QEMU_PACKED VHDDynDiskHeader;
 130
 131typedef struct BDRVVPCState {
 132    CoMutex lock;
 133    uint8_t footer_buf[HEADER_SIZE];
 134    uint64_t free_data_block_offset;
 135    int max_table_entries;
 136    uint32_t *pagetable;
 137    uint64_t bat_offset;
 138    uint64_t last_bitmap_offset;
 139
 140    uint32_t block_size;
 141    uint32_t bitmap_size;
 142    bool force_use_chs;
 143    bool force_use_sz;
 144
 145#ifdef CACHE
 146    uint8_t *pageentry_u8;
 147    uint32_t *pageentry_u32;
 148    uint16_t *pageentry_u16;
 149
 150    uint64_t last_bitmap;
 151#endif
 152
 153    Error *migration_blocker;
 154} BDRVVPCState;
 155
 156#define VPC_OPT_SIZE_CALC "force_size_calc"
 157static QemuOptsList vpc_runtime_opts = {
 158    .name = "vpc-runtime-opts",
 159    .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
 160    .desc = {
 161        {
 162            .name = VPC_OPT_SIZE_CALC,
 163            .type = QEMU_OPT_STRING,
 164            .help = "Force disk size calculation to use either CHS geometry, "
 165                    "or use the disk current_size specified in the VHD footer. "
 166                    "{chs, current_size}"
 167        },
 168        { /* end of list */ }
 169    }
 170};
 171
 172static QemuOptsList vpc_create_opts;
 173
 174static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 175{
 176    uint32_t res = 0;
 177    int i;
 178
 179    for (i = 0; i < size; i++)
 180        res += buf[i];
 181
 182    return ~res;
 183}
 184
 185
 186static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 187{
 188    if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
 189        return 100;
 190    return 0;
 191}
 192
 193static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
 194                              Error **errp)
 195{
 196    BDRVVPCState *s = bs->opaque;
 197    const char *size_calc;
 198
 199    size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
 200
 201    if (!size_calc) {
 202       /* no override, use autodetect only */
 203    } else if (!strcmp(size_calc, "current_size")) {
 204        s->force_use_sz = true;
 205    } else if (!strcmp(size_calc, "chs")) {
 206        s->force_use_chs = true;
 207    } else {
 208        error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
 209    }
 210}
 211
 212static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 213                    Error **errp)
 214{
 215    BDRVVPCState *s = bs->opaque;
 216    int i;
 217    VHDFooter *footer;
 218    VHDDynDiskHeader *dyndisk_header;
 219    QemuOpts *opts = NULL;
 220    Error *local_err = NULL;
 221    bool use_chs;
 222    uint8_t buf[HEADER_SIZE];
 223    uint32_t checksum;
 224    uint64_t computed_size;
 225    uint64_t pagetable_size;
 226    int disk_type = VHD_DYNAMIC;
 227    int ret;
 228    int64_t bs_size;
 229
 230    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
 231                               false, errp);
 232    if (!bs->file) {
 233        return -EINVAL;
 234    }
 235
 236    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
 237    qemu_opts_absorb_qdict(opts, options, &local_err);
 238    if (local_err) {
 239        error_propagate(errp, local_err);
 240        ret = -EINVAL;
 241        goto fail;
 242    }
 243
 244    vpc_parse_options(bs, opts, &local_err);
 245    if (local_err) {
 246        error_propagate(errp, local_err);
 247        ret = -EINVAL;
 248        goto fail;
 249    }
 250
 251    ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
 252    if (ret < 0) {
 253        error_setg(errp, "Unable to read VHD header");
 254        goto fail;
 255    }
 256
 257    footer = (VHDFooter *) s->footer_buf;
 258    if (strncmp(footer->creator, "conectix", 8)) {
 259        int64_t offset = bdrv_getlength(bs->file->bs);
 260        if (offset < 0) {
 261            ret = offset;
 262            error_setg(errp, "Invalid file size");
 263            goto fail;
 264        } else if (offset < HEADER_SIZE) {
 265            ret = -EINVAL;
 266            error_setg(errp, "File too small for a VHD header");
 267            goto fail;
 268        }
 269
 270        /* If a fixed disk, the footer is found only at the end of the file */
 271        ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
 272                         HEADER_SIZE);
 273        if (ret < 0) {
 274            goto fail;
 275        }
 276        if (strncmp(footer->creator, "conectix", 8)) {
 277            error_setg(errp, "invalid VPC image");
 278            ret = -EINVAL;
 279            goto fail;
 280        }
 281        disk_type = VHD_FIXED;
 282    }
 283
 284    checksum = be32_to_cpu(footer->checksum);
 285    footer->checksum = 0;
 286    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
 287        fprintf(stderr, "block-vpc: The header checksum of '%s' is "
 288            "incorrect.\n", bs->filename);
 289
 290    /* Write 'checksum' back to footer, or else will leave it with zero. */
 291    footer->checksum = cpu_to_be32(checksum);
 292
 293    /* The visible size of a image in Virtual PC depends on the geometry
 294       rather than on the size stored in the footer (the size in the footer
 295       is too large usually) */
 296    bs->total_sectors = (int64_t)
 297        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 298
 299    /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
 300     * VHD image sizes differently.  VPC will rely on CHS geometry,
 301     * while Hyper-V and disk2vhd use the size specified in the footer.
 302     *
 303     * We use a couple of approaches to try and determine the correct method:
 304     * look at the Creator App field, and look for images that have CHS
 305     * geometry that is the maximum value.
 306     *
 307     * If the CHS geometry is the maximum CHS geometry, then we assume that
 308     * the size is the footer->current_size to avoid truncation.  Otherwise,
 309     * we follow the table based on footer->creator_app:
 310     *
 311     *  Known creator apps:
 312     *      'vpc '  :  CHS              Virtual PC (uses disk geometry)
 313     *      'qemu'  :  CHS              QEMU (uses disk geometry)
 314     *      'qem2'  :  current_size     QEMU (uses current_size)
 315     *      'win '  :  current_size     Hyper-V
 316     *      'd2v '  :  current_size     Disk2vhd
 317     *      'tap\0' :  current_size     XenServer
 318     *      'CTXS'  :  current_size     XenConverter
 319     *
 320     *  The user can override the table values via drive options, however
 321     *  even with an override we will still use current_size for images
 322     *  that have CHS geometry of the maximum size.
 323     */
 324    use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
 325               !!strncmp(footer->creator_app, "qem2", 4) &&
 326               !!strncmp(footer->creator_app, "d2v ", 4) &&
 327               !!strncmp(footer->creator_app, "CTXS", 4) &&
 328               !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
 329
 330    if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
 331        bs->total_sectors = be64_to_cpu(footer->current_size) /
 332                                        BDRV_SECTOR_SIZE;
 333    }
 334
 335    /* Allow a maximum disk size of 2040 GiB */
 336    if (bs->total_sectors > VHD_MAX_SECTORS) {
 337        ret = -EFBIG;
 338        goto fail;
 339    }
 340
 341    if (disk_type == VHD_DYNAMIC) {
 342        ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
 343                         HEADER_SIZE);
 344        if (ret < 0) {
 345            error_setg(errp, "Error reading dynamic VHD header");
 346            goto fail;
 347        }
 348
 349        dyndisk_header = (VHDDynDiskHeader *) buf;
 350
 351        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
 352            error_setg(errp, "Invalid header magic");
 353            ret = -EINVAL;
 354            goto fail;
 355        }
 356
 357        s->block_size = be32_to_cpu(dyndisk_header->block_size);
 358        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
 359            error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
 360            ret = -EINVAL;
 361            goto fail;
 362        }
 363        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
 364
 365        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
 366
 367        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
 368            error_setg(errp, "Too many blocks");
 369            ret = -EINVAL;
 370            goto fail;
 371        }
 372
 373        computed_size = (uint64_t) s->max_table_entries * s->block_size;
 374        if (computed_size < bs->total_sectors * 512) {
 375            error_setg(errp, "Page table too small");
 376            ret = -EINVAL;
 377            goto fail;
 378        }
 379
 380        if (s->max_table_entries > SIZE_MAX / 4 ||
 381            s->max_table_entries > (int) INT_MAX / 4) {
 382            error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
 383                        s->max_table_entries);
 384            ret = -EINVAL;
 385            goto fail;
 386        }
 387
 388        pagetable_size = (uint64_t) s->max_table_entries * 4;
 389
 390        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
 391        if (s->pagetable == NULL) {
 392            error_setg(errp, "Unable to allocate memory for page table");
 393            ret = -ENOMEM;
 394            goto fail;
 395        }
 396
 397        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
 398
 399        ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
 400                         pagetable_size);
 401        if (ret < 0) {
 402            error_setg(errp, "Error reading pagetable");
 403            goto fail;
 404        }
 405
 406        s->free_data_block_offset =
 407            ROUND_UP(s->bat_offset + pagetable_size, 512);
 408
 409        for (i = 0; i < s->max_table_entries; i++) {
 410            be32_to_cpus(&s->pagetable[i]);
 411            if (s->pagetable[i] != 0xFFFFFFFF) {
 412                int64_t next = (512 * (int64_t) s->pagetable[i]) +
 413                    s->bitmap_size + s->block_size;
 414
 415                if (next > s->free_data_block_offset) {
 416                    s->free_data_block_offset = next;
 417                }
 418            }
 419        }
 420
 421        bs_size = bdrv_getlength(bs->file->bs);
 422        if (bs_size < 0) {
 423            error_setg_errno(errp, -bs_size, "Unable to learn image size");
 424            ret = bs_size;
 425            goto fail;
 426        }
 427        if (s->free_data_block_offset > bs_size) {
 428            error_setg(errp, "block-vpc: free_data_block_offset points after "
 429                             "the end of file. The image has been truncated.");
 430            ret = -EINVAL;
 431            goto fail;
 432        }
 433
 434        s->last_bitmap_offset = (int64_t) -1;
 435
 436#ifdef CACHE
 437        s->pageentry_u8 = g_malloc(512);
 438        s->pageentry_u32 = s->pageentry_u8;
 439        s->pageentry_u16 = s->pageentry_u8;
 440        s->last_pagetable = -1;
 441#endif
 442    }
 443
 444    /* Disable migration when VHD images are used */
 445    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
 446               "does not support live migration",
 447               bdrv_get_device_or_node_name(bs));
 448    ret = migrate_add_blocker(s->migration_blocker, &local_err);
 449    if (local_err) {
 450        error_propagate(errp, local_err);
 451        error_free(s->migration_blocker);
 452        goto fail;
 453    }
 454
 455    qemu_co_mutex_init(&s->lock);
 456
 457    return 0;
 458
 459fail:
 460    qemu_vfree(s->pagetable);
 461#ifdef CACHE
 462    g_free(s->pageentry_u8);
 463#endif
 464    return ret;
 465}
 466
 467static int vpc_reopen_prepare(BDRVReopenState *state,
 468                              BlockReopenQueue *queue, Error **errp)
 469{
 470    return 0;
 471}
 472
 473/*
 474 * Returns the absolute byte offset of the given sector in the image file.
 475 * If the sector is not allocated, -1 is returned instead.
 476 * If an error occurred trying to write an updated block bitmap back to
 477 * the file, -2 is returned, and the error value is written to *err.
 478 * This can only happen for a write operation.
 479 *
 480 * The parameter write must be 1 if the offset will be used for a write
 481 * operation (the block bitmaps is updated then), 0 otherwise.
 482 * If write is true then err must not be NULL.
 483 */
 484static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
 485                                       bool write, int *err)
 486{
 487    BDRVVPCState *s = bs->opaque;
 488    uint64_t bitmap_offset, block_offset;
 489    uint32_t pagetable_index, offset_in_block;
 490
 491    assert(!(write && err == NULL));
 492
 493    pagetable_index = offset / s->block_size;
 494    offset_in_block = offset % s->block_size;
 495
 496    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
 497        return -1; /* not allocated */
 498
 499    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
 500    block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
 501
 502    /* We must ensure that we don't write to any sectors which are marked as
 503       unused in the bitmap. We get away with setting all bits in the block
 504       bitmap each time we write to a new block. This might cause Virtual PC to
 505       miss sparse read optimization, but it's not a problem in terms of
 506       correctness. */
 507    if (write && (s->last_bitmap_offset != bitmap_offset)) {
 508        uint8_t bitmap[s->bitmap_size];
 509        int r;
 510
 511        s->last_bitmap_offset = bitmap_offset;
 512        memset(bitmap, 0xff, s->bitmap_size);
 513        r = bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
 514        if (r < 0) {
 515            *err = r;
 516            return -2;
 517        }
 518    }
 519
 520    return block_offset;
 521}
 522
 523/*
 524 * Writes the footer to the end of the image file. This is needed when the
 525 * file grows as it overwrites the old footer
 526 *
 527 * Returns 0 on success and < 0 on error
 528 */
 529static int rewrite_footer(BlockDriverState* bs)
 530{
 531    int ret;
 532    BDRVVPCState *s = bs->opaque;
 533    int64_t offset = s->free_data_block_offset;
 534
 535    ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
 536    if (ret < 0)
 537        return ret;
 538
 539    return 0;
 540}
 541
 542/*
 543 * Allocates a new block. This involves writing a new footer and updating
 544 * the Block Allocation Table to use the space at the old end of the image
 545 * file (overwriting the old footer)
 546 *
 547 * Returns the sectors' offset in the image file on success and < 0 on error
 548 */
 549static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
 550{
 551    BDRVVPCState *s = bs->opaque;
 552    int64_t bat_offset;
 553    uint32_t index, bat_value;
 554    int ret;
 555    uint8_t bitmap[s->bitmap_size];
 556
 557    /* Check if sector_num is valid */
 558    if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
 559        return -EINVAL;
 560    }
 561
 562    /* Write entry into in-memory BAT */
 563    index = offset / s->block_size;
 564    assert(s->pagetable[index] == 0xFFFFFFFF);
 565    s->pagetable[index] = s->free_data_block_offset / 512;
 566
 567    /* Initialize the block's bitmap */
 568    memset(bitmap, 0xff, s->bitmap_size);
 569    ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
 570        s->bitmap_size);
 571    if (ret < 0) {
 572        return ret;
 573    }
 574
 575    /* Write new footer (the old one will be overwritten) */
 576    s->free_data_block_offset += s->block_size + s->bitmap_size;
 577    ret = rewrite_footer(bs);
 578    if (ret < 0)
 579        goto fail;
 580
 581    /* Write BAT entry to disk */
 582    bat_offset = s->bat_offset + (4 * index);
 583    bat_value = cpu_to_be32(s->pagetable[index]);
 584    ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
 585    if (ret < 0)
 586        goto fail;
 587
 588    return get_image_offset(bs, offset, false, NULL);
 589
 590fail:
 591    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
 592    return ret;
 593}
 594
 595static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 596{
 597    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
 598    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 599
 600    if (be32_to_cpu(footer->type) != VHD_FIXED) {
 601        bdi->cluster_size = s->block_size;
 602    }
 603
 604    bdi->unallocated_blocks_are_zero = true;
 605    return 0;
 606}
 607
 608static int coroutine_fn
 609vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 610              QEMUIOVector *qiov, int flags)
 611{
 612    BDRVVPCState *s = bs->opaque;
 613    int ret;
 614    int64_t image_offset;
 615    int64_t n_bytes;
 616    int64_t bytes_done = 0;
 617    VHDFooter *footer = (VHDFooter *) s->footer_buf;
 618    QEMUIOVector local_qiov;
 619
 620    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 621        return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
 622    }
 623
 624    qemu_co_mutex_lock(&s->lock);
 625    qemu_iovec_init(&local_qiov, qiov->niov);
 626
 627    while (bytes > 0) {
 628        image_offset = get_image_offset(bs, offset, false, NULL);
 629        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
 630
 631        if (image_offset == -1) {
 632            qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
 633        } else {
 634            qemu_iovec_reset(&local_qiov);
 635            qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 636
 637            ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
 638                                 &local_qiov, 0);
 639            if (ret < 0) {
 640                goto fail;
 641            }
 642        }
 643
 644        bytes -= n_bytes;
 645        offset += n_bytes;
 646        bytes_done += n_bytes;
 647    }
 648
 649    ret = 0;
 650fail:
 651    qemu_iovec_destroy(&local_qiov);
 652    qemu_co_mutex_unlock(&s->lock);
 653
 654    return ret;
 655}
 656
 657static int coroutine_fn
 658vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 659               QEMUIOVector *qiov, int flags)
 660{
 661    BDRVVPCState *s = bs->opaque;
 662    int64_t image_offset;
 663    int64_t n_bytes;
 664    int64_t bytes_done = 0;
 665    int ret = 0;
 666    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 667    QEMUIOVector local_qiov;
 668
 669    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 670        return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
 671    }
 672
 673    qemu_co_mutex_lock(&s->lock);
 674    qemu_iovec_init(&local_qiov, qiov->niov);
 675
 676    while (bytes > 0) {
 677        image_offset = get_image_offset(bs, offset, true, &ret);
 678        if (image_offset == -2) {
 679            /* Failed to write block bitmap: can't proceed with write */
 680            goto fail;
 681        }
 682        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
 683
 684        if (image_offset == -1) {
 685            image_offset = alloc_block(bs, offset);
 686            if (image_offset < 0) {
 687                ret = image_offset;
 688                goto fail;
 689            }
 690        }
 691
 692        qemu_iovec_reset(&local_qiov);
 693        qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 694
 695        ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
 696                              &local_qiov, 0);
 697        if (ret < 0) {
 698            goto fail;
 699        }
 700
 701        bytes -= n_bytes;
 702        offset += n_bytes;
 703        bytes_done += n_bytes;
 704    }
 705
 706    ret = 0;
 707fail:
 708    qemu_iovec_destroy(&local_qiov);
 709    qemu_co_mutex_unlock(&s->lock);
 710
 711    return ret;
 712}
 713
 714static int coroutine_fn vpc_co_block_status(BlockDriverState *bs,
 715                                            bool want_zero,
 716                                            int64_t offset, int64_t bytes,
 717                                            int64_t *pnum, int64_t *map,
 718                                            BlockDriverState **file)
 719{
 720    BDRVVPCState *s = bs->opaque;
 721    VHDFooter *footer = (VHDFooter*) s->footer_buf;
 722    int64_t image_offset;
 723    bool allocated;
 724    int ret;
 725    int64_t n;
 726
 727    if (be32_to_cpu(footer->type) == VHD_FIXED) {
 728        *pnum = bytes;
 729        *map = offset;
 730        *file = bs->file->bs;
 731        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
 732    }
 733
 734    qemu_co_mutex_lock(&s->lock);
 735
 736    image_offset = get_image_offset(bs, offset, false, NULL);
 737    allocated = (image_offset != -1);
 738    *pnum = 0;
 739    ret = 0;
 740
 741    do {
 742        /* All sectors in a block are contiguous (without using the bitmap) */
 743        n = ROUND_UP(offset + 1, s->block_size) - offset;
 744        n = MIN(n, bytes);
 745
 746        *pnum += n;
 747        offset += n;
 748        bytes -= n;
 749        /* *pnum can't be greater than one block for allocated
 750         * sectors since there is always a bitmap in between. */
 751        if (allocated) {
 752            *file = bs->file->bs;
 753            *map = image_offset;
 754            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
 755            break;
 756        }
 757        if (bytes == 0) {
 758            break;
 759        }
 760        image_offset = get_image_offset(bs, offset, false, NULL);
 761    } while (image_offset == -1);
 762
 763    qemu_co_mutex_unlock(&s->lock);
 764    return ret;
 765}
 766
 767/*
 768 * Calculates the number of cylinders, heads and sectors per cylinder
 769 * based on a given number of sectors. This is the algorithm described
 770 * in the VHD specification.
 771 *
 772 * Note that the geometry doesn't always exactly match total_sectors but
 773 * may round it down.
 774 *
 775 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
 776 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
 777 * and instead allow up to 255 heads.
 778 */
 779static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
 780    uint8_t* heads, uint8_t* secs_per_cyl)
 781{
 782    uint32_t cyls_times_heads;
 783
 784    total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
 785
 786    if (total_sectors >= 65535LL * 16 * 63) {
 787        *secs_per_cyl = 255;
 788        *heads = 16;
 789        cyls_times_heads = total_sectors / *secs_per_cyl;
 790    } else {
 791        *secs_per_cyl = 17;
 792        cyls_times_heads = total_sectors / *secs_per_cyl;
 793        *heads = DIV_ROUND_UP(cyls_times_heads, 1024);
 794
 795        if (*heads < 4) {
 796            *heads = 4;
 797        }
 798
 799        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
 800            *secs_per_cyl = 31;
 801            *heads = 16;
 802            cyls_times_heads = total_sectors / *secs_per_cyl;
 803        }
 804
 805        if (cyls_times_heads >= (*heads * 1024)) {
 806            *secs_per_cyl = 63;
 807            *heads = 16;
 808            cyls_times_heads = total_sectors / *secs_per_cyl;
 809        }
 810    }
 811
 812    *cyls = cyls_times_heads / *heads;
 813
 814    return 0;
 815}
 816
 817static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
 818                               int64_t total_sectors)
 819{
 820    VHDDynDiskHeader *dyndisk_header =
 821        (VHDDynDiskHeader *) buf;
 822    size_t block_size, num_bat_entries;
 823    int i;
 824    int ret;
 825    int64_t offset = 0;
 826
 827    /* Write the footer (twice: at the beginning and at the end) */
 828    block_size = 0x200000;
 829    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
 830
 831    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
 832    if (ret < 0) {
 833        goto fail;
 834    }
 835
 836    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
 837    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
 838    if (ret < 0) {
 839        goto fail;
 840    }
 841
 842    /* Write the initial BAT */
 843    offset = 3 * 512;
 844
 845    memset(buf, 0xFF, 512);
 846    for (i = 0; i < DIV_ROUND_UP(num_bat_entries * 4, 512); i++) {
 847        ret = blk_pwrite(blk, offset, buf, 512, 0);
 848        if (ret < 0) {
 849            goto fail;
 850        }
 851        offset += 512;
 852    }
 853
 854    /* Prepare the Dynamic Disk Header */
 855    memset(buf, 0, 1024);
 856
 857    memcpy(dyndisk_header->magic, "cxsparse", 8);
 858
 859    /*
 860     * Note: The spec is actually wrong here for data_offset, it says
 861     * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
 862     */
 863    dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 864    dyndisk_header->table_offset = cpu_to_be64(3 * 512);
 865    dyndisk_header->version = cpu_to_be32(0x00010000);
 866    dyndisk_header->block_size = cpu_to_be32(block_size);
 867    dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
 868
 869    dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
 870
 871    /* Write the header */
 872    offset = 512;
 873
 874    ret = blk_pwrite(blk, offset, buf, 1024, 0);
 875    if (ret < 0) {
 876        goto fail;
 877    }
 878
 879 fail:
 880    return ret;
 881}
 882
 883static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
 884                             int64_t total_size, Error **errp)
 885{
 886    int ret;
 887
 888    /* Add footer to total size */
 889    total_size += HEADER_SIZE;
 890
 891    ret = blk_truncate(blk, total_size, PREALLOC_MODE_OFF, errp);
 892    if (ret < 0) {
 893        return ret;
 894    }
 895
 896    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
 897    if (ret < 0) {
 898        error_setg_errno(errp, -ret, "Unable to write VHD header");
 899        return ret;
 900    }
 901
 902    return ret;
 903}
 904
 905static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts,
 906                                        uint16_t *out_cyls,
 907                                        uint8_t *out_heads,
 908                                        uint8_t *out_secs_per_cyl,
 909                                        int64_t *out_total_sectors,
 910                                        Error **errp)
 911{
 912    int64_t total_size = vpc_opts->size;
 913    uint16_t cyls = 0;
 914    uint8_t heads = 0;
 915    uint8_t secs_per_cyl = 0;
 916    int64_t total_sectors;
 917    int i;
 918
 919    /*
 920     * Calculate matching total_size and geometry. Increase the number of
 921     * sectors requested until we get enough (or fail). This ensures that
 922     * qemu-img convert doesn't truncate images, but rather rounds up.
 923     *
 924     * If the image size can't be represented by a spec conformant CHS geometry,
 925     * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
 926     * the image size from the VHD footer to calculate total_sectors.
 927     */
 928    if (vpc_opts->force_size) {
 929        /* This will force the use of total_size for sector count, below */
 930        cyls         = VHD_CHS_MAX_C;
 931        heads        = VHD_CHS_MAX_H;
 932        secs_per_cyl = VHD_CHS_MAX_S;
 933    } else {
 934        total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
 935        for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
 936            calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
 937        }
 938    }
 939
 940    if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
 941        total_sectors = total_size / BDRV_SECTOR_SIZE;
 942        /* Allow a maximum disk size of 2040 GiB */
 943        if (total_sectors > VHD_MAX_SECTORS) {
 944            error_setg(errp, "Disk size is too large, max size is 2040 GiB");
 945            return -EFBIG;
 946        }
 947    } else {
 948        total_sectors = (int64_t) cyls * heads * secs_per_cyl;
 949    }
 950
 951    *out_total_sectors = total_sectors;
 952    if (out_cyls) {
 953        *out_cyls = cyls;
 954        *out_heads = heads;
 955        *out_secs_per_cyl = secs_per_cyl;
 956    }
 957
 958    return 0;
 959}
 960
 961static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
 962                                      Error **errp)
 963{
 964    BlockdevCreateOptionsVpc *vpc_opts;
 965    BlockBackend *blk = NULL;
 966    BlockDriverState *bs = NULL;
 967
 968    uint8_t buf[1024];
 969    VHDFooter *footer = (VHDFooter *) buf;
 970    uint16_t cyls = 0;
 971    uint8_t heads = 0;
 972    uint8_t secs_per_cyl = 0;
 973    int64_t total_sectors;
 974    int64_t total_size;
 975    int disk_type;
 976    int ret = -EIO;
 977
 978    assert(opts->driver == BLOCKDEV_DRIVER_VPC);
 979    vpc_opts = &opts->u.vpc;
 980
 981    /* Validate options and set default values */
 982    total_size = vpc_opts->size;
 983
 984    if (!vpc_opts->has_subformat) {
 985        vpc_opts->subformat = BLOCKDEV_VPC_SUBFORMAT_DYNAMIC;
 986    }
 987    switch (vpc_opts->subformat) {
 988    case BLOCKDEV_VPC_SUBFORMAT_DYNAMIC:
 989        disk_type = VHD_DYNAMIC;
 990        break;
 991    case BLOCKDEV_VPC_SUBFORMAT_FIXED:
 992        disk_type = VHD_FIXED;
 993        break;
 994    default:
 995        g_assert_not_reached();
 996    }
 997
 998    /* Create BlockBackend to write to the image */
 999    bs = bdrv_open_blockdev_ref(vpc_opts->file, errp);
1000    if (bs == NULL) {
1001        return -EIO;
1002    }
1003
1004    blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
1005    ret = blk_insert_bs(blk, bs, errp);
1006    if (ret < 0) {
1007        goto out;
1008    }
1009    blk_set_allow_write_beyond_eof(blk, true);
1010
1011    /* Get geometry and check that it matches the image size*/
1012    ret = calculate_rounded_image_size(vpc_opts, &cyls, &heads, &secs_per_cyl,
1013                                       &total_sectors, errp);
1014    if (ret < 0) {
1015        goto out;
1016    }
1017
1018    if (total_size != total_sectors * BDRV_SECTOR_SIZE) {
1019        error_setg(errp, "The requested image size cannot be represented in "
1020                         "CHS geometry");
1021        error_append_hint(errp, "Try size=%llu or force-size=on (the "
1022                                "latter makes the image incompatible with "
1023                                "Virtual PC)",
1024                          total_sectors * BDRV_SECTOR_SIZE);
1025        ret = -EINVAL;
1026        goto out;
1027    }
1028
1029    /* Prepare the Hard Disk Footer */
1030    memset(buf, 0, 1024);
1031
1032    memcpy(footer->creator, "conectix", 8);
1033    if (vpc_opts->force_size) {
1034        memcpy(footer->creator_app, "qem2", 4);
1035    } else {
1036        memcpy(footer->creator_app, "qemu", 4);
1037    }
1038    memcpy(footer->creator_os, "Wi2k", 4);
1039
1040    footer->features = cpu_to_be32(0x02);
1041    footer->version = cpu_to_be32(0x00010000);
1042    if (disk_type == VHD_DYNAMIC) {
1043        footer->data_offset = cpu_to_be64(HEADER_SIZE);
1044    } else {
1045        footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
1046    }
1047    footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
1048
1049    /* Version of Virtual PC 2007 */
1050    footer->major = cpu_to_be16(0x0005);
1051    footer->minor = cpu_to_be16(0x0003);
1052    footer->orig_size = cpu_to_be64(total_size);
1053    footer->current_size = cpu_to_be64(total_size);
1054    footer->cyls = cpu_to_be16(cyls);
1055    footer->heads = heads;
1056    footer->secs_per_cyl = secs_per_cyl;
1057
1058    footer->type = cpu_to_be32(disk_type);
1059
1060    qemu_uuid_generate(&footer->uuid);
1061
1062    footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
1063
1064    if (disk_type == VHD_DYNAMIC) {
1065        ret = create_dynamic_disk(blk, buf, total_sectors);
1066        if (ret < 0) {
1067            error_setg(errp, "Unable to create or write VHD header");
1068        }
1069    } else {
1070        ret = create_fixed_disk(blk, buf, total_size, errp);
1071    }
1072
1073out:
1074    blk_unref(blk);
1075    bdrv_unref(bs);
1076    return ret;
1077}
1078
1079static int coroutine_fn vpc_co_create_opts(const char *filename,
1080                                           QemuOpts *opts, Error **errp)
1081{
1082    BlockdevCreateOptions *create_options = NULL;
1083    QDict *qdict = NULL;
1084    QObject *qobj;
1085    Visitor *v;
1086    BlockDriverState *bs = NULL;
1087    Error *local_err = NULL;
1088    int ret;
1089
1090    static const QDictRenames opt_renames[] = {
1091        { VPC_OPT_FORCE_SIZE,           "force-size" },
1092        { NULL, NULL },
1093    };
1094
1095    /* Parse options and convert legacy syntax */
1096    qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vpc_create_opts, true);
1097
1098    if (!qdict_rename_keys(qdict, opt_renames, errp)) {
1099        ret = -EINVAL;
1100        goto fail;
1101    }
1102
1103    /* Create and open the file (protocol layer) */
1104    ret = bdrv_create_file(filename, opts, &local_err);
1105    if (ret < 0) {
1106        error_propagate(errp, local_err);
1107        goto fail;
1108    }
1109
1110    bs = bdrv_open(filename, NULL, NULL,
1111                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
1112    if (bs == NULL) {
1113        ret = -EIO;
1114        goto fail;
1115    }
1116
1117    /* Now get the QAPI type BlockdevCreateOptions */
1118    qdict_put_str(qdict, "driver", "vpc");
1119    qdict_put_str(qdict, "file", bs->node_name);
1120
1121    qobj = qdict_crumple(qdict, errp);
1122    QDECREF(qdict);
1123    qdict = qobject_to(QDict, qobj);
1124    if (qdict == NULL) {
1125        ret = -EINVAL;
1126        goto fail;
1127    }
1128
1129    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
1130    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
1131    visit_free(v);
1132
1133    if (local_err) {
1134        error_propagate(errp, local_err);
1135        ret = -EINVAL;
1136        goto fail;
1137    }
1138
1139    /* Silently round up size */
1140    assert(create_options->driver == BLOCKDEV_DRIVER_VPC);
1141    create_options->u.vpc.size =
1142        ROUND_UP(create_options->u.vpc.size, BDRV_SECTOR_SIZE);
1143
1144    if (!create_options->u.vpc.force_size) {
1145        int64_t total_sectors;
1146        ret = calculate_rounded_image_size(&create_options->u.vpc, NULL, NULL,
1147                                           NULL, &total_sectors, errp);
1148        if (ret < 0) {
1149            goto fail;
1150        }
1151
1152        create_options->u.vpc.size = total_sectors * BDRV_SECTOR_SIZE;
1153    }
1154
1155
1156    /* Create the vpc image (format layer) */
1157    ret = vpc_co_create(create_options, errp);
1158
1159fail:
1160    QDECREF(qdict);
1161    bdrv_unref(bs);
1162    qapi_free_BlockdevCreateOptions(create_options);
1163    return ret;
1164}
1165
1166
1167static int vpc_has_zero_init(BlockDriverState *bs)
1168{
1169    BDRVVPCState *s = bs->opaque;
1170    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
1171
1172    if (be32_to_cpu(footer->type) == VHD_FIXED) {
1173        return bdrv_has_zero_init(bs->file->bs);
1174    } else {
1175        return 1;
1176    }
1177}
1178
1179static void vpc_close(BlockDriverState *bs)
1180{
1181    BDRVVPCState *s = bs->opaque;
1182    qemu_vfree(s->pagetable);
1183#ifdef CACHE
1184    g_free(s->pageentry_u8);
1185#endif
1186
1187    migrate_del_blocker(s->migration_blocker);
1188    error_free(s->migration_blocker);
1189}
1190
1191static QemuOptsList vpc_create_opts = {
1192    .name = "vpc-create-opts",
1193    .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1194    .desc = {
1195        {
1196            .name = BLOCK_OPT_SIZE,
1197            .type = QEMU_OPT_SIZE,
1198            .help = "Virtual disk size"
1199        },
1200        {
1201            .name = BLOCK_OPT_SUBFMT,
1202            .type = QEMU_OPT_STRING,
1203            .help =
1204                "Type of virtual hard disk format. Supported formats are "
1205                "{dynamic (default) | fixed} "
1206        },
1207        {
1208            .name = VPC_OPT_FORCE_SIZE,
1209            .type = QEMU_OPT_BOOL,
1210            .help = "Force disk size calculation to use the actual size "
1211                    "specified, rather than using the nearest CHS-based "
1212                    "calculation"
1213        },
1214        { /* end of list */ }
1215    }
1216};
1217
1218static BlockDriver bdrv_vpc = {
1219    .format_name    = "vpc",
1220    .instance_size  = sizeof(BDRVVPCState),
1221
1222    .bdrv_probe             = vpc_probe,
1223    .bdrv_open              = vpc_open,
1224    .bdrv_close             = vpc_close,
1225    .bdrv_reopen_prepare    = vpc_reopen_prepare,
1226    .bdrv_child_perm        = bdrv_format_default_perms,
1227    .bdrv_co_create         = vpc_co_create,
1228    .bdrv_co_create_opts    = vpc_co_create_opts,
1229
1230    .bdrv_co_preadv             = vpc_co_preadv,
1231    .bdrv_co_pwritev            = vpc_co_pwritev,
1232    .bdrv_co_block_status       = vpc_co_block_status,
1233
1234    .bdrv_get_info          = vpc_get_info,
1235
1236    .create_opts            = &vpc_create_opts,
1237    .bdrv_has_zero_init     = vpc_has_zero_init,
1238};
1239
1240static void bdrv_vpc_init(void)
1241{
1242    bdrv_register(&bdrv_vpc);
1243}
1244
1245block_init(bdrv_vpc_init);
1246