qemu/block/vpc.c
<<
>>
Prefs
   1/*
   2 * Block driver for Connectix / Microsoft Virtual PC images
   3 *
   4 * Copyright (c) 2005 Alex Beregszaszi
   5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qapi/error.h"
  28#include "block/block_int.h"
  29#include "block/qdict.h"
  30#include "sysemu/block-backend.h"
  31#include "qemu/module.h"
  32#include "qemu/option.h"
  33#include "migration/blocker.h"
  34#include "qemu/bswap.h"
  35#include "qemu/uuid.h"
  36#include "qapi/qmp/qdict.h"
  37#include "qapi/qobject-input-visitor.h"
  38#include "qapi/qapi-visit-block-core.h"
  39
  40/**************************************************************/
  41
  42//#define CACHE
  43
  44enum vhd_type {
  45    VHD_FIXED           = 2,
  46    VHD_DYNAMIC         = 3,
  47    VHD_DIFFERENCING    = 4,
  48};
  49
  50/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  51#define VHD_TIMESTAMP_BASE 946684800
  52
  53#define VHD_CHS_MAX_C   65535LL
  54#define VHD_CHS_MAX_H   16
  55#define VHD_CHS_MAX_S   255
  56
  57#define VHD_MAX_SECTORS       0xff000000    /* 2040 GiB max image size */
  58#define VHD_MAX_GEOMETRY      (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
  59
  60#define VPC_OPT_FORCE_SIZE "force_size"
  61
  62/* always big-endian */
  63typedef struct vhd_footer {
  64    char        creator[8]; /* "conectix" */
  65    uint32_t    features;
  66    uint32_t    version;
  67
  68    /* Offset of next header structure, 0xFFFFFFFF if none */
  69    uint64_t    data_offset;
  70
  71    /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
  72    uint32_t    timestamp;
  73
  74    char        creator_app[4]; /*  e.g., "vpc " */
  75    uint16_t    major;
  76    uint16_t    minor;
  77    char        creator_os[4]; /* "Wi2k" */
  78
  79    uint64_t    orig_size;
  80    uint64_t    current_size;
  81
  82    uint16_t    cyls;
  83    uint8_t     heads;
  84    uint8_t     secs_per_cyl;
  85
  86    uint32_t    type;
  87
  88    /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
  89       the bytes in the footer without the checksum field") */
  90    uint32_t    checksum;
  91
  92    /* UUID used to identify a parent hard disk (backing file) */
  93    QemuUUID    uuid;
  94
  95    uint8_t     in_saved_state;
  96    uint8_t     reserved[427];
  97} QEMU_PACKED VHDFooter;
  98
  99QEMU_BUILD_BUG_ON(sizeof(VHDFooter) != 512);
 100
 101typedef struct vhd_dyndisk_header {
 102    char        magic[8]; /* "cxsparse" */
 103
 104    /* Offset of next header structure, 0xFFFFFFFF if none */
 105    uint64_t    data_offset;
 106
 107    /* Offset of the Block Allocation Table (BAT) */
 108    uint64_t    table_offset;
 109
 110    uint32_t    version;
 111    uint32_t    max_table_entries; /* 32bit/entry */
 112
 113    /* 2 MB by default, must be a power of two */
 114    uint32_t    block_size;
 115
 116    uint32_t    checksum;
 117    uint8_t     parent_uuid[16];
 118    uint32_t    parent_timestamp;
 119    uint32_t    reserved;
 120
 121    /* Backing file name (in UTF-16) */
 122    uint8_t     parent_name[512];
 123
 124    struct {
 125        uint32_t    platform;
 126        uint32_t    data_space;
 127        uint32_t    data_length;
 128        uint32_t    reserved;
 129        uint64_t    data_offset;
 130    } parent_locator[8];
 131    uint8_t     reserved2[256];
 132} QEMU_PACKED VHDDynDiskHeader;
 133
 134QEMU_BUILD_BUG_ON(sizeof(VHDDynDiskHeader) != 1024);
 135
 136typedef struct BDRVVPCState {
 137    CoMutex lock;
 138    VHDFooter footer;
 139    uint64_t free_data_block_offset;
 140    int max_table_entries;
 141    uint32_t *pagetable;
 142    uint64_t bat_offset;
 143    uint64_t last_bitmap_offset;
 144
 145    uint32_t block_size;
 146    uint32_t bitmap_size;
 147    bool force_use_chs;
 148    bool force_use_sz;
 149
 150#ifdef CACHE
 151    uint8_t *pageentry_u8;
 152    uint32_t *pageentry_u32;
 153    uint16_t *pageentry_u16;
 154
 155    uint64_t last_bitmap;
 156#endif
 157
 158    Error *migration_blocker;
 159} BDRVVPCState;
 160
 161#define VPC_OPT_SIZE_CALC "force_size_calc"
 162static QemuOptsList vpc_runtime_opts = {
 163    .name = "vpc-runtime-opts",
 164    .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
 165    .desc = {
 166        {
 167            .name = VPC_OPT_SIZE_CALC,
 168            .type = QEMU_OPT_STRING,
 169            .help = "Force disk size calculation to use either CHS geometry, "
 170                    "or use the disk current_size specified in the VHD footer. "
 171                    "{chs, current_size}"
 172        },
 173        { /* end of list */ }
 174    }
 175};
 176
 177static QemuOptsList vpc_create_opts;
 178
 179static uint32_t vpc_checksum(void *p, size_t size)
 180{
 181    uint8_t *buf = p;
 182    uint32_t res = 0;
 183    int i;
 184
 185    for (i = 0; i < size; i++)
 186        res += buf[i];
 187
 188    return ~res;
 189}
 190
 191
 192static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 193{
 194    if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
 195        return 100;
 196    return 0;
 197}
 198
 199static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
 200                              Error **errp)
 201{
 202    BDRVVPCState *s = bs->opaque;
 203    const char *size_calc;
 204
 205    size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
 206
 207    if (!size_calc) {
 208       /* no override, use autodetect only */
 209    } else if (!strcmp(size_calc, "current_size")) {
 210        s->force_use_sz = true;
 211    } else if (!strcmp(size_calc, "chs")) {
 212        s->force_use_chs = true;
 213    } else {
 214        error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
 215    }
 216}
 217
 218static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 219                    Error **errp)
 220{
 221    BDRVVPCState *s = bs->opaque;
 222    int i;
 223    VHDFooter *footer;
 224    QemuOpts *opts = NULL;
 225    Error *local_err = NULL;
 226    bool use_chs;
 227    VHDDynDiskHeader dyndisk_header;
 228    uint32_t checksum;
 229    uint64_t computed_size;
 230    uint64_t pagetable_size;
 231    int disk_type = VHD_DYNAMIC;
 232    int ret;
 233    int64_t bs_size;
 234
 235    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
 236                               BDRV_CHILD_IMAGE, false, errp);
 237    if (!bs->file) {
 238        return -EINVAL;
 239    }
 240
 241    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
 242    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
 243        ret = -EINVAL;
 244        goto fail;
 245    }
 246
 247    vpc_parse_options(bs, opts, &local_err);
 248    if (local_err) {
 249        error_propagate(errp, local_err);
 250        ret = -EINVAL;
 251        goto fail;
 252    }
 253
 254    ret = bdrv_pread(bs->file, 0, &s->footer, sizeof(s->footer));
 255    if (ret < 0) {
 256        error_setg(errp, "Unable to read VHD header");
 257        goto fail;
 258    }
 259
 260    footer = &s->footer;
 261    if (strncmp(footer->creator, "conectix", 8)) {
 262        int64_t offset = bdrv_getlength(bs->file->bs);
 263        if (offset < 0) {
 264            ret = offset;
 265            error_setg(errp, "Invalid file size");
 266            goto fail;
 267        } else if (offset < sizeof(*footer)) {
 268            ret = -EINVAL;
 269            error_setg(errp, "File too small for a VHD header");
 270            goto fail;
 271        }
 272
 273        /* If a fixed disk, the footer is found only at the end of the file */
 274        ret = bdrv_pread(bs->file, offset - sizeof(*footer),
 275                         footer, sizeof(*footer));
 276        if (ret < 0) {
 277            goto fail;
 278        }
 279        if (strncmp(footer->creator, "conectix", 8) ||
 280            be32_to_cpu(footer->type) != VHD_FIXED) {
 281            error_setg(errp, "invalid VPC image");
 282            ret = -EINVAL;
 283            goto fail;
 284        }
 285        disk_type = VHD_FIXED;
 286    }
 287
 288    checksum = be32_to_cpu(footer->checksum);
 289    footer->checksum = 0;
 290    if (vpc_checksum(footer, sizeof(*footer)) != checksum) {
 291        error_setg(errp, "Incorrect header checksum");
 292        ret = -EINVAL;
 293        goto fail;
 294    }
 295
 296    /* Write 'checksum' back to footer, or else will leave it with zero. */
 297    footer->checksum = cpu_to_be32(checksum);
 298
 299    /* The visible size of a image in Virtual PC depends on the geometry
 300       rather than on the size stored in the footer (the size in the footer
 301       is too large usually) */
 302    bs->total_sectors = (int64_t)
 303        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 304
 305    /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
 306     * VHD image sizes differently.  VPC will rely on CHS geometry,
 307     * while Hyper-V and disk2vhd use the size specified in the footer.
 308     *
 309     * We use a couple of approaches to try and determine the correct method:
 310     * look at the Creator App field, and look for images that have CHS
 311     * geometry that is the maximum value.
 312     *
 313     * If the CHS geometry is the maximum CHS geometry, then we assume that
 314     * the size is the footer->current_size to avoid truncation.  Otherwise,
 315     * we follow the table based on footer->creator_app:
 316     *
 317     *  Known creator apps:
 318     *      'vpc '  :  CHS              Virtual PC (uses disk geometry)
 319     *      'qemu'  :  CHS              QEMU (uses disk geometry)
 320     *      'qem2'  :  current_size     QEMU (uses current_size)
 321     *      'win '  :  current_size     Hyper-V
 322     *      'd2v '  :  current_size     Disk2vhd
 323     *      'tap\0' :  current_size     XenServer
 324     *      'CTXS'  :  current_size     XenConverter
 325     *
 326     *  The user can override the table values via drive options, however
 327     *  even with an override we will still use current_size for images
 328     *  that have CHS geometry of the maximum size.
 329     */
 330    use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
 331               !!strncmp(footer->creator_app, "qem2", 4) &&
 332               !!strncmp(footer->creator_app, "d2v ", 4) &&
 333               !!strncmp(footer->creator_app, "CTXS", 4) &&
 334               !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
 335
 336    if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
 337        bs->total_sectors = be64_to_cpu(footer->current_size) /
 338                                        BDRV_SECTOR_SIZE;
 339    }
 340
 341    /* Allow a maximum disk size of 2040 GiB */
 342    if (bs->total_sectors > VHD_MAX_SECTORS) {
 343        ret = -EFBIG;
 344        goto fail;
 345    }
 346
 347    if (disk_type == VHD_DYNAMIC) {
 348        ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset),
 349                         &dyndisk_header, sizeof(dyndisk_header));
 350        if (ret < 0) {
 351            error_setg(errp, "Error reading dynamic VHD header");
 352            goto fail;
 353        }
 354
 355        if (strncmp(dyndisk_header.magic, "cxsparse", 8)) {
 356            error_setg(errp, "Invalid header magic");
 357            ret = -EINVAL;
 358            goto fail;
 359        }
 360
 361        s->block_size = be32_to_cpu(dyndisk_header.block_size);
 362        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
 363            error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
 364            ret = -EINVAL;
 365            goto fail;
 366        }
 367        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
 368
 369        s->max_table_entries = be32_to_cpu(dyndisk_header.max_table_entries);
 370
 371        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
 372            error_setg(errp, "Too many blocks");
 373            ret = -EINVAL;
 374            goto fail;
 375        }
 376
 377        computed_size = (uint64_t) s->max_table_entries * s->block_size;
 378        if (computed_size < bs->total_sectors * 512) {
 379            error_setg(errp, "Page table too small");
 380            ret = -EINVAL;
 381            goto fail;
 382        }
 383
 384        if (s->max_table_entries > SIZE_MAX / 4 ||
 385            s->max_table_entries > (int) INT_MAX / 4) {
 386            error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
 387                        s->max_table_entries);
 388            ret = -EINVAL;
 389            goto fail;
 390        }
 391
 392        pagetable_size = (uint64_t) s->max_table_entries * 4;
 393
 394        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
 395        if (s->pagetable == NULL) {
 396            error_setg(errp, "Unable to allocate memory for page table");
 397            ret = -ENOMEM;
 398            goto fail;
 399        }
 400
 401        s->bat_offset = be64_to_cpu(dyndisk_header.table_offset);
 402
 403        ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
 404                         pagetable_size);
 405        if (ret < 0) {
 406            error_setg(errp, "Error reading pagetable");
 407            goto fail;
 408        }
 409
 410        s->free_data_block_offset =
 411            ROUND_UP(s->bat_offset + pagetable_size, 512);
 412
 413        for (i = 0; i < s->max_table_entries; i++) {
 414            be32_to_cpus(&s->pagetable[i]);
 415            if (s->pagetable[i] != 0xFFFFFFFF) {
 416                int64_t next = (512 * (int64_t) s->pagetable[i]) +
 417                    s->bitmap_size + s->block_size;
 418
 419                if (next > s->free_data_block_offset) {
 420                    s->free_data_block_offset = next;
 421                }
 422            }
 423        }
 424
 425        bs_size = bdrv_getlength(bs->file->bs);
 426        if (bs_size < 0) {
 427            error_setg_errno(errp, -bs_size, "Unable to learn image size");
 428            ret = bs_size;
 429            goto fail;
 430        }
 431        if (s->free_data_block_offset > bs_size) {
 432            error_setg(errp, "block-vpc: free_data_block_offset points after "
 433                             "the end of file. The image has been truncated.");
 434            ret = -EINVAL;
 435            goto fail;
 436        }
 437
 438        s->last_bitmap_offset = (int64_t) -1;
 439
 440#ifdef CACHE
 441        s->pageentry_u8 = g_malloc(512);
 442        s->pageentry_u32 = s->pageentry_u8;
 443        s->pageentry_u16 = s->pageentry_u8;
 444        s->last_pagetable = -1;
 445#endif
 446    }
 447
 448    /* Disable migration when VHD images are used */
 449    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
 450               "does not support live migration",
 451               bdrv_get_device_or_node_name(bs));
 452    ret = migrate_add_blocker(s->migration_blocker, errp);
 453    if (ret < 0) {
 454        error_free(s->migration_blocker);
 455        goto fail;
 456    }
 457
 458    qemu_co_mutex_init(&s->lock);
 459    qemu_opts_del(opts);
 460
 461    return 0;
 462
 463fail:
 464    qemu_opts_del(opts);
 465    qemu_vfree(s->pagetable);
 466#ifdef CACHE
 467    g_free(s->pageentry_u8);
 468#endif
 469    return ret;
 470}
 471
 472static int vpc_reopen_prepare(BDRVReopenState *state,
 473                              BlockReopenQueue *queue, Error **errp)
 474{
 475    return 0;
 476}
 477
 478/*
 479 * Returns the absolute byte offset of the given sector in the image file.
 480 * If the sector is not allocated, -1 is returned instead.
 481 * If an error occurred trying to write an updated block bitmap back to
 482 * the file, -2 is returned, and the error value is written to *err.
 483 * This can only happen for a write operation.
 484 *
 485 * The parameter write must be 1 if the offset will be used for a write
 486 * operation (the block bitmaps is updated then), 0 otherwise.
 487 * If write is true then err must not be NULL.
 488 */
 489static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
 490                                       bool write, int *err)
 491{
 492    BDRVVPCState *s = bs->opaque;
 493    uint64_t bitmap_offset, block_offset;
 494    uint32_t pagetable_index, offset_in_block;
 495
 496    assert(!(write && err == NULL));
 497
 498    pagetable_index = offset / s->block_size;
 499    offset_in_block = offset % s->block_size;
 500
 501    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
 502        return -1; /* not allocated */
 503
 504    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
 505    block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
 506
 507    /* We must ensure that we don't write to any sectors which are marked as
 508       unused in the bitmap. We get away with setting all bits in the block
 509       bitmap each time we write to a new block. This might cause Virtual PC to
 510       miss sparse read optimization, but it's not a problem in terms of
 511       correctness. */
 512    if (write && (s->last_bitmap_offset != bitmap_offset)) {
 513        uint8_t bitmap[s->bitmap_size];
 514        int r;
 515
 516        s->last_bitmap_offset = bitmap_offset;
 517        memset(bitmap, 0xff, s->bitmap_size);
 518        r = bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
 519        if (r < 0) {
 520            *err = r;
 521            return -2;
 522        }
 523    }
 524
 525    return block_offset;
 526}
 527
 528/*
 529 * Writes the footer to the end of the image file. This is needed when the
 530 * file grows as it overwrites the old footer
 531 *
 532 * Returns 0 on success and < 0 on error
 533 */
 534static int rewrite_footer(BlockDriverState *bs)
 535{
 536    int ret;
 537    BDRVVPCState *s = bs->opaque;
 538    int64_t offset = s->free_data_block_offset;
 539
 540    ret = bdrv_pwrite_sync(bs->file, offset, &s->footer, sizeof(s->footer));
 541    if (ret < 0)
 542        return ret;
 543
 544    return 0;
 545}
 546
 547/*
 548 * Allocates a new block. This involves writing a new footer and updating
 549 * the Block Allocation Table to use the space at the old end of the image
 550 * file (overwriting the old footer)
 551 *
 552 * Returns the sectors' offset in the image file on success and < 0 on error
 553 */
 554static int64_t alloc_block(BlockDriverState *bs, int64_t offset)
 555{
 556    BDRVVPCState *s = bs->opaque;
 557    int64_t bat_offset;
 558    uint32_t index, bat_value;
 559    int ret;
 560    uint8_t bitmap[s->bitmap_size];
 561
 562    /* Check if sector_num is valid */
 563    if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
 564        return -EINVAL;
 565    }
 566
 567    /* Write entry into in-memory BAT */
 568    index = offset / s->block_size;
 569    assert(s->pagetable[index] == 0xFFFFFFFF);
 570    s->pagetable[index] = s->free_data_block_offset / 512;
 571
 572    /* Initialize the block's bitmap */
 573    memset(bitmap, 0xff, s->bitmap_size);
 574    ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
 575        s->bitmap_size);
 576    if (ret < 0) {
 577        return ret;
 578    }
 579
 580    /* Write new footer (the old one will be overwritten) */
 581    s->free_data_block_offset += s->block_size + s->bitmap_size;
 582    ret = rewrite_footer(bs);
 583    if (ret < 0)
 584        goto fail;
 585
 586    /* Write BAT entry to disk */
 587    bat_offset = s->bat_offset + (4 * index);
 588    bat_value = cpu_to_be32(s->pagetable[index]);
 589    ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
 590    if (ret < 0)
 591        goto fail;
 592
 593    return get_image_offset(bs, offset, false, NULL);
 594
 595fail:
 596    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
 597    return ret;
 598}
 599
 600static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 601{
 602    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
 603
 604    if (be32_to_cpu(s->footer.type) != VHD_FIXED) {
 605        bdi->cluster_size = s->block_size;
 606    }
 607
 608    return 0;
 609}
 610
 611static int coroutine_fn
 612vpc_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
 613              QEMUIOVector *qiov, BdrvRequestFlags flags)
 614{
 615    BDRVVPCState *s = bs->opaque;
 616    int ret;
 617    int64_t image_offset;
 618    int64_t n_bytes;
 619    int64_t bytes_done = 0;
 620    QEMUIOVector local_qiov;
 621
 622    if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
 623        return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
 624    }
 625
 626    qemu_co_mutex_lock(&s->lock);
 627    qemu_iovec_init(&local_qiov, qiov->niov);
 628
 629    while (bytes > 0) {
 630        image_offset = get_image_offset(bs, offset, false, NULL);
 631        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
 632
 633        if (image_offset == -1) {
 634            qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
 635        } else {
 636            qemu_iovec_reset(&local_qiov);
 637            qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 638
 639            qemu_co_mutex_unlock(&s->lock);
 640            ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
 641                                 &local_qiov, 0);
 642            qemu_co_mutex_lock(&s->lock);
 643            if (ret < 0) {
 644                goto fail;
 645            }
 646        }
 647
 648        bytes -= n_bytes;
 649        offset += n_bytes;
 650        bytes_done += n_bytes;
 651    }
 652
 653    ret = 0;
 654fail:
 655    qemu_iovec_destroy(&local_qiov);
 656    qemu_co_mutex_unlock(&s->lock);
 657
 658    return ret;
 659}
 660
 661static int coroutine_fn
 662vpc_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
 663               QEMUIOVector *qiov, BdrvRequestFlags flags)
 664{
 665    BDRVVPCState *s = bs->opaque;
 666    int64_t image_offset;
 667    int64_t n_bytes;
 668    int64_t bytes_done = 0;
 669    int ret = 0;
 670    QEMUIOVector local_qiov;
 671
 672    if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
 673        return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
 674    }
 675
 676    qemu_co_mutex_lock(&s->lock);
 677    qemu_iovec_init(&local_qiov, qiov->niov);
 678
 679    while (bytes > 0) {
 680        image_offset = get_image_offset(bs, offset, true, &ret);
 681        if (image_offset == -2) {
 682            /* Failed to write block bitmap: can't proceed with write */
 683            goto fail;
 684        }
 685        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
 686
 687        if (image_offset == -1) {
 688            image_offset = alloc_block(bs, offset);
 689            if (image_offset < 0) {
 690                ret = image_offset;
 691                goto fail;
 692            }
 693        }
 694
 695        qemu_iovec_reset(&local_qiov);
 696        qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
 697
 698        qemu_co_mutex_unlock(&s->lock);
 699        ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
 700                              &local_qiov, 0);
 701        qemu_co_mutex_lock(&s->lock);
 702        if (ret < 0) {
 703            goto fail;
 704        }
 705
 706        bytes -= n_bytes;
 707        offset += n_bytes;
 708        bytes_done += n_bytes;
 709    }
 710
 711    ret = 0;
 712fail:
 713    qemu_iovec_destroy(&local_qiov);
 714    qemu_co_mutex_unlock(&s->lock);
 715
 716    return ret;
 717}
 718
 719static int coroutine_fn vpc_co_block_status(BlockDriverState *bs,
 720                                            bool want_zero,
 721                                            int64_t offset, int64_t bytes,
 722                                            int64_t *pnum, int64_t *map,
 723                                            BlockDriverState **file)
 724{
 725    BDRVVPCState *s = bs->opaque;
 726    int64_t image_offset;
 727    bool allocated;
 728    int ret;
 729    int64_t n;
 730
 731    if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
 732        *pnum = bytes;
 733        *map = offset;
 734        *file = bs->file->bs;
 735        return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_RECURSE;
 736    }
 737
 738    qemu_co_mutex_lock(&s->lock);
 739
 740    image_offset = get_image_offset(bs, offset, false, NULL);
 741    allocated = (image_offset != -1);
 742    *pnum = 0;
 743    ret = BDRV_BLOCK_ZERO;
 744
 745    do {
 746        /* All sectors in a block are contiguous (without using the bitmap) */
 747        n = ROUND_UP(offset + 1, s->block_size) - offset;
 748        n = MIN(n, bytes);
 749
 750        *pnum += n;
 751        offset += n;
 752        bytes -= n;
 753        /* *pnum can't be greater than one block for allocated
 754         * sectors since there is always a bitmap in between. */
 755        if (allocated) {
 756            *file = bs->file->bs;
 757            *map = image_offset;
 758            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
 759            break;
 760        }
 761        if (bytes == 0) {
 762            break;
 763        }
 764        image_offset = get_image_offset(bs, offset, false, NULL);
 765    } while (image_offset == -1);
 766
 767    qemu_co_mutex_unlock(&s->lock);
 768    return ret;
 769}
 770
 771/*
 772 * Calculates the number of cylinders, heads and sectors per cylinder
 773 * based on a given number of sectors. This is the algorithm described
 774 * in the VHD specification.
 775 *
 776 * Note that the geometry doesn't always exactly match total_sectors but
 777 * may round it down.
 778 *
 779 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
 780 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
 781 * and instead allow up to 255 heads.
 782 */
 783static int calculate_geometry(int64_t total_sectors, uint16_t *cyls,
 784    uint8_t *heads, uint8_t *secs_per_cyl)
 785{
 786    uint32_t cyls_times_heads;
 787
 788    total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
 789
 790    if (total_sectors >= 65535LL * 16 * 63) {
 791        *secs_per_cyl = 255;
 792        *heads = 16;
 793        cyls_times_heads = total_sectors / *secs_per_cyl;
 794    } else {
 795        *secs_per_cyl = 17;
 796        cyls_times_heads = total_sectors / *secs_per_cyl;
 797        *heads = DIV_ROUND_UP(cyls_times_heads, 1024);
 798
 799        if (*heads < 4) {
 800            *heads = 4;
 801        }
 802
 803        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
 804            *secs_per_cyl = 31;
 805            *heads = 16;
 806            cyls_times_heads = total_sectors / *secs_per_cyl;
 807        }
 808
 809        if (cyls_times_heads >= (*heads * 1024)) {
 810            *secs_per_cyl = 63;
 811            *heads = 16;
 812            cyls_times_heads = total_sectors / *secs_per_cyl;
 813        }
 814    }
 815
 816    *cyls = cyls_times_heads / *heads;
 817
 818    return 0;
 819}
 820
 821static int create_dynamic_disk(BlockBackend *blk, VHDFooter *footer,
 822                               int64_t total_sectors)
 823{
 824    VHDDynDiskHeader dyndisk_header;
 825    uint8_t bat_sector[512];
 826    size_t block_size, num_bat_entries;
 827    int i;
 828    int ret;
 829    int64_t offset = 0;
 830
 831    /* Write the footer (twice: at the beginning and at the end) */
 832    block_size = 0x200000;
 833    num_bat_entries = DIV_ROUND_UP(total_sectors, block_size / 512);
 834
 835    ret = blk_pwrite(blk, offset, footer, sizeof(*footer), 0);
 836    if (ret < 0) {
 837        goto fail;
 838    }
 839
 840    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
 841    ret = blk_pwrite(blk, offset, footer, sizeof(*footer), 0);
 842    if (ret < 0) {
 843        goto fail;
 844    }
 845
 846    /* Write the initial BAT */
 847    offset = 3 * 512;
 848
 849    memset(bat_sector, 0xFF, 512);
 850    for (i = 0; i < DIV_ROUND_UP(num_bat_entries * 4, 512); i++) {
 851        ret = blk_pwrite(blk, offset, bat_sector, 512, 0);
 852        if (ret < 0) {
 853            goto fail;
 854        }
 855        offset += 512;
 856    }
 857
 858    /* Prepare the Dynamic Disk Header */
 859    memset(&dyndisk_header, 0, sizeof(dyndisk_header));
 860
 861    memcpy(dyndisk_header.magic, "cxsparse", 8);
 862
 863    /*
 864     * Note: The spec is actually wrong here for data_offset, it says
 865     * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
 866     */
 867    dyndisk_header.data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
 868    dyndisk_header.table_offset = cpu_to_be64(3 * 512);
 869    dyndisk_header.version = cpu_to_be32(0x00010000);
 870    dyndisk_header.block_size = cpu_to_be32(block_size);
 871    dyndisk_header.max_table_entries = cpu_to_be32(num_bat_entries);
 872
 873    dyndisk_header.checksum = cpu_to_be32(
 874        vpc_checksum(&dyndisk_header, sizeof(dyndisk_header)));
 875
 876    /* Write the header */
 877    offset = 512;
 878
 879    ret = blk_pwrite(blk, offset, &dyndisk_header, sizeof(dyndisk_header), 0);
 880    if (ret < 0) {
 881        goto fail;
 882    }
 883
 884    ret = 0;
 885 fail:
 886    return ret;
 887}
 888
 889static int create_fixed_disk(BlockBackend *blk, VHDFooter *footer,
 890                             int64_t total_size, Error **errp)
 891{
 892    int ret;
 893
 894    /* Add footer to total size */
 895    total_size += sizeof(*footer);
 896
 897    ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp);
 898    if (ret < 0) {
 899        return ret;
 900    }
 901
 902    ret = blk_pwrite(blk, total_size - sizeof(*footer),
 903                     footer, sizeof(*footer), 0);
 904    if (ret < 0) {
 905        error_setg_errno(errp, -ret, "Unable to write VHD header");
 906        return ret;
 907    }
 908
 909    return 0;
 910}
 911
 912static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts,
 913                                        uint16_t *out_cyls,
 914                                        uint8_t *out_heads,
 915                                        uint8_t *out_secs_per_cyl,
 916                                        int64_t *out_total_sectors,
 917                                        Error **errp)
 918{
 919    int64_t total_size = vpc_opts->size;
 920    uint16_t cyls = 0;
 921    uint8_t heads = 0;
 922    uint8_t secs_per_cyl = 0;
 923    int64_t total_sectors;
 924    int i;
 925
 926    /*
 927     * Calculate matching total_size and geometry. Increase the number of
 928     * sectors requested until we get enough (or fail). This ensures that
 929     * qemu-img convert doesn't truncate images, but rather rounds up.
 930     *
 931     * If the image size can't be represented by a spec conformant CHS geometry,
 932     * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
 933     * the image size from the VHD footer to calculate total_sectors.
 934     */
 935    if (vpc_opts->force_size) {
 936        /* This will force the use of total_size for sector count, below */
 937        cyls         = VHD_CHS_MAX_C;
 938        heads        = VHD_CHS_MAX_H;
 939        secs_per_cyl = VHD_CHS_MAX_S;
 940    } else {
 941        total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
 942        for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
 943            calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
 944        }
 945    }
 946
 947    if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
 948        total_sectors = total_size / BDRV_SECTOR_SIZE;
 949        /* Allow a maximum disk size of 2040 GiB */
 950        if (total_sectors > VHD_MAX_SECTORS) {
 951            error_setg(errp, "Disk size is too large, max size is 2040 GiB");
 952            return -EFBIG;
 953        }
 954    } else {
 955        total_sectors = (int64_t) cyls * heads * secs_per_cyl;
 956    }
 957
 958    *out_total_sectors = total_sectors;
 959    if (out_cyls) {
 960        *out_cyls = cyls;
 961        *out_heads = heads;
 962        *out_secs_per_cyl = secs_per_cyl;
 963    }
 964
 965    return 0;
 966}
 967
 968static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
 969                                      Error **errp)
 970{
 971    BlockdevCreateOptionsVpc *vpc_opts;
 972    BlockBackend *blk = NULL;
 973    BlockDriverState *bs = NULL;
 974
 975    VHDFooter footer;
 976    uint16_t cyls = 0;
 977    uint8_t heads = 0;
 978    uint8_t secs_per_cyl = 0;
 979    int64_t total_sectors;
 980    int64_t total_size;
 981    int disk_type;
 982    int ret = -EIO;
 983    QemuUUID uuid;
 984
 985    assert(opts->driver == BLOCKDEV_DRIVER_VPC);
 986    vpc_opts = &opts->u.vpc;
 987
 988    /* Validate options and set default values */
 989    total_size = vpc_opts->size;
 990
 991    if (!vpc_opts->has_subformat) {
 992        vpc_opts->subformat = BLOCKDEV_VPC_SUBFORMAT_DYNAMIC;
 993    }
 994    switch (vpc_opts->subformat) {
 995    case BLOCKDEV_VPC_SUBFORMAT_DYNAMIC:
 996        disk_type = VHD_DYNAMIC;
 997        break;
 998    case BLOCKDEV_VPC_SUBFORMAT_FIXED:
 999        disk_type = VHD_FIXED;
1000        break;
1001    default:
1002        g_assert_not_reached();
1003    }
1004
1005    /* Create BlockBackend to write to the image */
1006    bs = bdrv_open_blockdev_ref(vpc_opts->file, errp);
1007    if (bs == NULL) {
1008        return -EIO;
1009    }
1010
1011    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
1012                          errp);
1013    if (!blk) {
1014        ret = -EPERM;
1015        goto out;
1016    }
1017    blk_set_allow_write_beyond_eof(blk, true);
1018
1019    /* Get geometry and check that it matches the image size*/
1020    ret = calculate_rounded_image_size(vpc_opts, &cyls, &heads, &secs_per_cyl,
1021                                       &total_sectors, errp);
1022    if (ret < 0) {
1023        goto out;
1024    }
1025
1026    if (total_size != total_sectors * BDRV_SECTOR_SIZE) {
1027        error_setg(errp, "The requested image size cannot be represented in "
1028                         "CHS geometry");
1029        error_append_hint(errp, "Try size=%llu or force-size=on (the "
1030                                "latter makes the image incompatible with "
1031                                "Virtual PC)",
1032                          total_sectors * BDRV_SECTOR_SIZE);
1033        ret = -EINVAL;
1034        goto out;
1035    }
1036
1037    /* Prepare the Hard Disk Footer */
1038    memset(&footer, 0, sizeof(footer));
1039
1040    memcpy(footer.creator, "conectix", 8);
1041    if (vpc_opts->force_size) {
1042        memcpy(footer.creator_app, "qem2", 4);
1043    } else {
1044        memcpy(footer.creator_app, "qemu", 4);
1045    }
1046    memcpy(footer.creator_os, "Wi2k", 4);
1047
1048    footer.features = cpu_to_be32(0x02);
1049    footer.version = cpu_to_be32(0x00010000);
1050    if (disk_type == VHD_DYNAMIC) {
1051        footer.data_offset = cpu_to_be64(sizeof(footer));
1052    } else {
1053        footer.data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
1054    }
1055    footer.timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
1056
1057    /* Version of Virtual PC 2007 */
1058    footer.major = cpu_to_be16(0x0005);
1059    footer.minor = cpu_to_be16(0x0003);
1060    footer.orig_size = cpu_to_be64(total_size);
1061    footer.current_size = cpu_to_be64(total_size);
1062    footer.cyls = cpu_to_be16(cyls);
1063    footer.heads = heads;
1064    footer.secs_per_cyl = secs_per_cyl;
1065
1066    footer.type = cpu_to_be32(disk_type);
1067
1068    qemu_uuid_generate(&uuid);
1069    footer.uuid = uuid;
1070
1071    footer.checksum = cpu_to_be32(vpc_checksum(&footer, sizeof(footer)));
1072
1073    if (disk_type == VHD_DYNAMIC) {
1074        ret = create_dynamic_disk(blk, &footer, total_sectors);
1075        if (ret < 0) {
1076            error_setg(errp, "Unable to create or write VHD header");
1077        }
1078    } else {
1079        ret = create_fixed_disk(blk, &footer, total_size, errp);
1080    }
1081
1082out:
1083    blk_unref(blk);
1084    bdrv_unref(bs);
1085    return ret;
1086}
1087
1088static int coroutine_fn vpc_co_create_opts(BlockDriver *drv,
1089                                           const char *filename,
1090                                           QemuOpts *opts,
1091                                           Error **errp)
1092{
1093    BlockdevCreateOptions *create_options = NULL;
1094    QDict *qdict;
1095    Visitor *v;
1096    BlockDriverState *bs = NULL;
1097    int ret;
1098
1099    static const QDictRenames opt_renames[] = {
1100        { VPC_OPT_FORCE_SIZE,           "force-size" },
1101        { NULL, NULL },
1102    };
1103
1104    /* Parse options and convert legacy syntax */
1105    qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vpc_create_opts, true);
1106
1107    if (!qdict_rename_keys(qdict, opt_renames, errp)) {
1108        ret = -EINVAL;
1109        goto fail;
1110    }
1111
1112    /* Create and open the file (protocol layer) */
1113    ret = bdrv_create_file(filename, opts, errp);
1114    if (ret < 0) {
1115        goto fail;
1116    }
1117
1118    bs = bdrv_open(filename, NULL, NULL,
1119                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
1120    if (bs == NULL) {
1121        ret = -EIO;
1122        goto fail;
1123    }
1124
1125    /* Now get the QAPI type BlockdevCreateOptions */
1126    qdict_put_str(qdict, "driver", "vpc");
1127    qdict_put_str(qdict, "file", bs->node_name);
1128
1129    v = qobject_input_visitor_new_flat_confused(qdict, errp);
1130    if (!v) {
1131        ret = -EINVAL;
1132        goto fail;
1133    }
1134
1135    visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp);
1136    visit_free(v);
1137    if (!create_options) {
1138        ret = -EINVAL;
1139        goto fail;
1140    }
1141
1142    /* Silently round up size */
1143    assert(create_options->driver == BLOCKDEV_DRIVER_VPC);
1144    create_options->u.vpc.size =
1145        ROUND_UP(create_options->u.vpc.size, BDRV_SECTOR_SIZE);
1146
1147    if (!create_options->u.vpc.force_size) {
1148        int64_t total_sectors;
1149        ret = calculate_rounded_image_size(&create_options->u.vpc, NULL, NULL,
1150                                           NULL, &total_sectors, errp);
1151        if (ret < 0) {
1152            goto fail;
1153        }
1154
1155        create_options->u.vpc.size = total_sectors * BDRV_SECTOR_SIZE;
1156    }
1157
1158
1159    /* Create the vpc image (format layer) */
1160    ret = vpc_co_create(create_options, errp);
1161
1162fail:
1163    qobject_unref(qdict);
1164    bdrv_unref(bs);
1165    qapi_free_BlockdevCreateOptions(create_options);
1166    return ret;
1167}
1168
1169
1170static int vpc_has_zero_init(BlockDriverState *bs)
1171{
1172    BDRVVPCState *s = bs->opaque;
1173
1174    if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
1175        return bdrv_has_zero_init(bs->file->bs);
1176    } else {
1177        return 1;
1178    }
1179}
1180
1181static void vpc_close(BlockDriverState *bs)
1182{
1183    BDRVVPCState *s = bs->opaque;
1184    qemu_vfree(s->pagetable);
1185#ifdef CACHE
1186    g_free(s->pageentry_u8);
1187#endif
1188
1189    migrate_del_blocker(s->migration_blocker);
1190    error_free(s->migration_blocker);
1191}
1192
1193static QemuOptsList vpc_create_opts = {
1194    .name = "vpc-create-opts",
1195    .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1196    .desc = {
1197        {
1198            .name = BLOCK_OPT_SIZE,
1199            .type = QEMU_OPT_SIZE,
1200            .help = "Virtual disk size"
1201        },
1202        {
1203            .name = BLOCK_OPT_SUBFMT,
1204            .type = QEMU_OPT_STRING,
1205            .help =
1206                "Type of virtual hard disk format. Supported formats are "
1207                "{dynamic (default) | fixed} "
1208        },
1209        {
1210            .name = VPC_OPT_FORCE_SIZE,
1211            .type = QEMU_OPT_BOOL,
1212            .help = "Force disk size calculation to use the actual size "
1213                    "specified, rather than using the nearest CHS-based "
1214                    "calculation"
1215        },
1216        { /* end of list */ }
1217    }
1218};
1219
1220static const char *const vpc_strong_runtime_opts[] = {
1221    VPC_OPT_SIZE_CALC,
1222
1223    NULL
1224};
1225
1226static BlockDriver bdrv_vpc = {
1227    .format_name    = "vpc",
1228    .instance_size  = sizeof(BDRVVPCState),
1229
1230    .bdrv_probe             = vpc_probe,
1231    .bdrv_open              = vpc_open,
1232    .bdrv_close             = vpc_close,
1233    .bdrv_reopen_prepare    = vpc_reopen_prepare,
1234    .bdrv_child_perm        = bdrv_default_perms,
1235    .bdrv_co_create         = vpc_co_create,
1236    .bdrv_co_create_opts    = vpc_co_create_opts,
1237
1238    .bdrv_co_preadv             = vpc_co_preadv,
1239    .bdrv_co_pwritev            = vpc_co_pwritev,
1240    .bdrv_co_block_status       = vpc_co_block_status,
1241
1242    .bdrv_get_info          = vpc_get_info,
1243
1244    .is_format              = true,
1245    .create_opts            = &vpc_create_opts,
1246    .bdrv_has_zero_init     = vpc_has_zero_init,
1247    .strong_runtime_opts    = vpc_strong_runtime_opts,
1248};
1249
1250static void bdrv_vpc_init(void)
1251{
1252    bdrv_register(&bdrv_vpc);
1253}
1254
1255block_init(bdrv_vpc_init);
1256