qemu/block/vdi.c
<<
>>
Prefs
   1/*
   2 * Block driver for the Virtual Disk Image (VDI) format
   3 *
   4 * Copyright (c) 2009, 2012 Stefan Weil
   5 *
   6 * This program is free software: you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation, either version 2 of the License, or
   9 * (at your option) version 3 or any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 *
  19 * Reference:
  20 * http://forums.virtualbox.org/viewtopic.php?t=8046
  21 *
  22 * This driver supports create / read / write operations on VDI images.
  23 *
  24 * Todo (see also TODO in code):
  25 *
  26 * Some features like snapshots are still missing.
  27 *
  28 * Deallocation of zero-filled blocks and shrinking images are missing, too
  29 * (might be added to common block layer).
  30 *
  31 * Allocation of blocks could be optimized (less writes to block map and
  32 * header).
  33 *
  34 * Read and write of adjacents blocks could be done in one operation
  35 * (current code uses one operation per block (1 MiB).
  36 *
  37 * The code is not thread safe (missing locks for changes in header and
  38 * block table, no problem with current QEMU).
  39 *
  40 * Hints:
  41 *
  42 * Blocks (VDI documentation) correspond to clusters (QEMU).
  43 * QEMU's backing files could be implemented using VDI snapshot files (TODO).
  44 * VDI snapshot files may also contain the complete machine state.
  45 * Maybe this machine state can be converted to QEMU PC machine snapshot data.
  46 *
  47 * The driver keeps a block cache (little endian entries) in memory.
  48 * For the standard block size (1 MiB), a 1 TiB disk will use 4 MiB RAM,
  49 * so this seems to be reasonable.
  50 */
  51
  52#include "qemu-common.h"
  53#include "block/block_int.h"
  54#include "qemu/module.h"
  55#include "migration/migration.h"
  56
  57#if defined(CONFIG_UUID)
  58#include <uuid/uuid.h>
  59#else
  60/* TODO: move uuid emulation to some central place in QEMU. */
  61#include "sysemu/sysemu.h"     /* UUID_FMT */
  62typedef unsigned char uuid_t[16];
  63#endif
  64
  65/* Code configuration options. */
  66
  67/* Enable debug messages. */
  68//~ #define CONFIG_VDI_DEBUG
  69
  70/* Support write operations on VDI images. */
  71#define CONFIG_VDI_WRITE
  72
  73/* Support non-standard block (cluster) size. This is untested.
  74 * Maybe it will be needed for very large images.
  75 */
  76//~ #define CONFIG_VDI_BLOCK_SIZE
  77
  78/* Support static (fixed, pre-allocated) images. */
  79#define CONFIG_VDI_STATIC_IMAGE
  80
  81/* Command line option for static images. */
  82#define BLOCK_OPT_STATIC "static"
  83
  84#define KiB     1024
  85#define MiB     (KiB * KiB)
  86
  87#define SECTOR_SIZE 512
  88#define DEFAULT_CLUSTER_SIZE (1 * MiB)
  89
  90#if defined(CONFIG_VDI_DEBUG)
  91#define logout(fmt, ...) \
  92                fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__)
  93#else
  94#define logout(fmt, ...) ((void)0)
  95#endif
  96
  97/* Image signature. */
  98#define VDI_SIGNATURE 0xbeda107f
  99
 100/* Image version. */
 101#define VDI_VERSION_1_1 0x00010001
 102
 103/* Image type. */
 104#define VDI_TYPE_DYNAMIC 1
 105#define VDI_TYPE_STATIC  2
 106
 107/* Innotek / SUN images use these strings in header.text:
 108 * "<<< innotek VirtualBox Disk Image >>>\n"
 109 * "<<< Sun xVM VirtualBox Disk Image >>>\n"
 110 * "<<< Sun VirtualBox Disk Image >>>\n"
 111 * The value does not matter, so QEMU created images use a different text.
 112 */
 113#define VDI_TEXT "<<< QEMU VM Virtual Disk Image >>>\n"
 114
 115/* A never-allocated block; semantically arbitrary content. */
 116#define VDI_UNALLOCATED 0xffffffffU
 117
 118/* A discarded (no longer allocated) block; semantically zero-filled. */
 119#define VDI_DISCARDED   0xfffffffeU
 120
 121#define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)
 122
 123#if !defined(CONFIG_UUID)
 124static inline void uuid_generate(uuid_t out)
 125{
 126    memset(out, 0, sizeof(uuid_t));
 127}
 128
 129static inline int uuid_is_null(const uuid_t uu)
 130{
 131    uuid_t null_uuid = { 0 };
 132    return memcmp(uu, null_uuid, sizeof(uuid_t)) == 0;
 133}
 134
 135static inline void uuid_unparse(const uuid_t uu, char *out)
 136{
 137    snprintf(out, 37, UUID_FMT,
 138            uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
 139            uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
 140}
 141#endif
 142
 143typedef struct {
 144    char text[0x40];
 145    uint32_t signature;
 146    uint32_t version;
 147    uint32_t header_size;
 148    uint32_t image_type;
 149    uint32_t image_flags;
 150    char description[256];
 151    uint32_t offset_bmap;
 152    uint32_t offset_data;
 153    uint32_t cylinders;         /* disk geometry, unused here */
 154    uint32_t heads;             /* disk geometry, unused here */
 155    uint32_t sectors;           /* disk geometry, unused here */
 156    uint32_t sector_size;
 157    uint32_t unused1;
 158    uint64_t disk_size;
 159    uint32_t block_size;
 160    uint32_t block_extra;       /* unused here */
 161    uint32_t blocks_in_image;
 162    uint32_t blocks_allocated;
 163    uuid_t uuid_image;
 164    uuid_t uuid_last_snap;
 165    uuid_t uuid_link;
 166    uuid_t uuid_parent;
 167    uint64_t unused2[7];
 168} VdiHeader;
 169
 170typedef struct {
 171    /* The block map entries are little endian (even in memory). */
 172    uint32_t *bmap;
 173    /* Size of block (bytes). */
 174    uint32_t block_size;
 175    /* Size of block (sectors). */
 176    uint32_t block_sectors;
 177    /* First sector of block map. */
 178    uint32_t bmap_sector;
 179    /* VDI header (converted to host endianness). */
 180    VdiHeader header;
 181
 182    Error *migration_blocker;
 183} BDRVVdiState;
 184
 185/* Change UUID from little endian (IPRT = VirtualBox format) to big endian
 186 * format (network byte order, standard, see RFC 4122) and vice versa.
 187 */
 188static void uuid_convert(uuid_t uuid)
 189{
 190    bswap32s((uint32_t *)&uuid[0]);
 191    bswap16s((uint16_t *)&uuid[4]);
 192    bswap16s((uint16_t *)&uuid[6]);
 193}
 194
 195static void vdi_header_to_cpu(VdiHeader *header)
 196{
 197    le32_to_cpus(&header->signature);
 198    le32_to_cpus(&header->version);
 199    le32_to_cpus(&header->header_size);
 200    le32_to_cpus(&header->image_type);
 201    le32_to_cpus(&header->image_flags);
 202    le32_to_cpus(&header->offset_bmap);
 203    le32_to_cpus(&header->offset_data);
 204    le32_to_cpus(&header->cylinders);
 205    le32_to_cpus(&header->heads);
 206    le32_to_cpus(&header->sectors);
 207    le32_to_cpus(&header->sector_size);
 208    le64_to_cpus(&header->disk_size);
 209    le32_to_cpus(&header->block_size);
 210    le32_to_cpus(&header->block_extra);
 211    le32_to_cpus(&header->blocks_in_image);
 212    le32_to_cpus(&header->blocks_allocated);
 213    uuid_convert(header->uuid_image);
 214    uuid_convert(header->uuid_last_snap);
 215    uuid_convert(header->uuid_link);
 216    uuid_convert(header->uuid_parent);
 217}
 218
 219static void vdi_header_to_le(VdiHeader *header)
 220{
 221    cpu_to_le32s(&header->signature);
 222    cpu_to_le32s(&header->version);
 223    cpu_to_le32s(&header->header_size);
 224    cpu_to_le32s(&header->image_type);
 225    cpu_to_le32s(&header->image_flags);
 226    cpu_to_le32s(&header->offset_bmap);
 227    cpu_to_le32s(&header->offset_data);
 228    cpu_to_le32s(&header->cylinders);
 229    cpu_to_le32s(&header->heads);
 230    cpu_to_le32s(&header->sectors);
 231    cpu_to_le32s(&header->sector_size);
 232    cpu_to_le64s(&header->disk_size);
 233    cpu_to_le32s(&header->block_size);
 234    cpu_to_le32s(&header->block_extra);
 235    cpu_to_le32s(&header->blocks_in_image);
 236    cpu_to_le32s(&header->blocks_allocated);
 237    cpu_to_le32s(&header->blocks_allocated);
 238    uuid_convert(header->uuid_image);
 239    uuid_convert(header->uuid_last_snap);
 240    uuid_convert(header->uuid_link);
 241    uuid_convert(header->uuid_parent);
 242}
 243
 244#if defined(CONFIG_VDI_DEBUG)
 245static void vdi_header_print(VdiHeader *header)
 246{
 247    char uuid[37];
 248    logout("text        %s", header->text);
 249    logout("signature   0x%08x\n", header->signature);
 250    logout("header size 0x%04x\n", header->header_size);
 251    logout("image type  0x%04x\n", header->image_type);
 252    logout("image flags 0x%04x\n", header->image_flags);
 253    logout("description %s\n", header->description);
 254    logout("offset bmap 0x%04x\n", header->offset_bmap);
 255    logout("offset data 0x%04x\n", header->offset_data);
 256    logout("cylinders   0x%04x\n", header->cylinders);
 257    logout("heads       0x%04x\n", header->heads);
 258    logout("sectors     0x%04x\n", header->sectors);
 259    logout("sector size 0x%04x\n", header->sector_size);
 260    logout("image size  0x%" PRIx64 " B (%" PRIu64 " MiB)\n",
 261           header->disk_size, header->disk_size / MiB);
 262    logout("block size  0x%04x\n", header->block_size);
 263    logout("block extra 0x%04x\n", header->block_extra);
 264    logout("blocks tot. 0x%04x\n", header->blocks_in_image);
 265    logout("blocks all. 0x%04x\n", header->blocks_allocated);
 266    uuid_unparse(header->uuid_image, uuid);
 267    logout("uuid image  %s\n", uuid);
 268    uuid_unparse(header->uuid_last_snap, uuid);
 269    logout("uuid snap   %s\n", uuid);
 270    uuid_unparse(header->uuid_link, uuid);
 271    logout("uuid link   %s\n", uuid);
 272    uuid_unparse(header->uuid_parent, uuid);
 273    logout("uuid parent %s\n", uuid);
 274}
 275#endif
 276
 277static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res,
 278                     BdrvCheckMode fix)
 279{
 280    /* TODO: additional checks possible. */
 281    BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
 282    uint32_t blocks_allocated = 0;
 283    uint32_t block;
 284    uint32_t *bmap;
 285    logout("\n");
 286
 287    if (fix) {
 288        return -ENOTSUP;
 289    }
 290
 291    bmap = g_malloc(s->header.blocks_in_image * sizeof(uint32_t));
 292    memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t));
 293
 294    /* Check block map and value of blocks_allocated. */
 295    for (block = 0; block < s->header.blocks_in_image; block++) {
 296        uint32_t bmap_entry = le32_to_cpu(s->bmap[block]);
 297        if (VDI_IS_ALLOCATED(bmap_entry)) {
 298            if (bmap_entry < s->header.blocks_in_image) {
 299                blocks_allocated++;
 300                if (!VDI_IS_ALLOCATED(bmap[bmap_entry])) {
 301                    bmap[bmap_entry] = bmap_entry;
 302                } else {
 303                    fprintf(stderr, "ERROR: block index %" PRIu32
 304                            " also used by %" PRIu32 "\n", bmap[bmap_entry], bmap_entry);
 305                    res->corruptions++;
 306                }
 307            } else {
 308                fprintf(stderr, "ERROR: block index %" PRIu32
 309                        " too large, is %" PRIu32 "\n", block, bmap_entry);
 310                res->corruptions++;
 311            }
 312        }
 313    }
 314    if (blocks_allocated != s->header.blocks_allocated) {
 315        fprintf(stderr, "ERROR: allocated blocks mismatch, is %" PRIu32
 316               ", should be %" PRIu32 "\n",
 317               blocks_allocated, s->header.blocks_allocated);
 318        res->corruptions++;
 319    }
 320
 321    g_free(bmap);
 322
 323    return 0;
 324}
 325
 326static int vdi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 327{
 328    /* TODO: vdi_get_info would be needed for machine snapshots.
 329       vm_state_offset is still missing. */
 330    BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
 331    logout("\n");
 332    bdi->cluster_size = s->block_size;
 333    bdi->vm_state_offset = 0;
 334    return 0;
 335}
 336
 337static int vdi_make_empty(BlockDriverState *bs)
 338{
 339    /* TODO: missing code. */
 340    logout("\n");
 341    /* The return value for missing code must be 0, see block.c. */
 342    return 0;
 343}
 344
 345static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
 346{
 347    const VdiHeader *header = (const VdiHeader *)buf;
 348    int result = 0;
 349
 350    logout("\n");
 351
 352    if (buf_size < sizeof(*header)) {
 353        /* Header too small, no VDI. */
 354    } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) {
 355        result = 100;
 356    }
 357
 358    if (result == 0) {
 359        logout("no vdi image\n");
 360    } else {
 361        logout("%s", header->text);
 362    }
 363
 364    return result;
 365}
 366
 367static int vdi_open(BlockDriverState *bs, QDict *options, int flags)
 368{
 369    BDRVVdiState *s = bs->opaque;
 370    VdiHeader header;
 371    size_t bmap_size;
 372    int ret;
 373
 374    logout("\n");
 375
 376    ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
 377    if (ret < 0) {
 378        goto fail;
 379    }
 380
 381    vdi_header_to_cpu(&header);
 382#if defined(CONFIG_VDI_DEBUG)
 383    vdi_header_print(&header);
 384#endif
 385
 386    if (header.disk_size % SECTOR_SIZE != 0) {
 387        /* 'VBoxManage convertfromraw' can create images with odd disk sizes.
 388           We accept them but round the disk size to the next multiple of
 389           SECTOR_SIZE. */
 390        logout("odd disk size %" PRIu64 " B, round up\n", header.disk_size);
 391        header.disk_size += SECTOR_SIZE - 1;
 392        header.disk_size &= ~(SECTOR_SIZE - 1);
 393    }
 394
 395    if (header.signature != VDI_SIGNATURE) {
 396        logout("bad vdi signature %08x\n", header.signature);
 397        ret = -EMEDIUMTYPE;
 398        goto fail;
 399    } else if (header.version != VDI_VERSION_1_1) {
 400        logout("unsupported version %u.%u\n",
 401               header.version >> 16, header.version & 0xffff);
 402        ret = -ENOTSUP;
 403        goto fail;
 404    } else if (header.offset_bmap % SECTOR_SIZE != 0) {
 405        /* We only support block maps which start on a sector boundary. */
 406        logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
 407        ret = -ENOTSUP;
 408        goto fail;
 409    } else if (header.offset_data % SECTOR_SIZE != 0) {
 410        /* We only support data blocks which start on a sector boundary. */
 411        logout("unsupported data offset 0x%x B\n", header.offset_data);
 412        ret = -ENOTSUP;
 413        goto fail;
 414    } else if (header.sector_size != SECTOR_SIZE) {
 415        logout("unsupported sector size %u B\n", header.sector_size);
 416        ret = -ENOTSUP;
 417        goto fail;
 418    } else if (header.block_size != 1 * MiB) {
 419        logout("unsupported block size %u B\n", header.block_size);
 420        ret = -ENOTSUP;
 421        goto fail;
 422    } else if (header.disk_size >
 423               (uint64_t)header.blocks_in_image * header.block_size) {
 424        logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
 425        ret = -ENOTSUP;
 426        goto fail;
 427    } else if (!uuid_is_null(header.uuid_link)) {
 428        logout("link uuid != 0, unsupported\n");
 429        ret = -ENOTSUP;
 430        goto fail;
 431    } else if (!uuid_is_null(header.uuid_parent)) {
 432        logout("parent uuid != 0, unsupported\n");
 433        ret = -ENOTSUP;
 434        goto fail;
 435    }
 436
 437    bs->total_sectors = header.disk_size / SECTOR_SIZE;
 438
 439    s->block_size = header.block_size;
 440    s->block_sectors = header.block_size / SECTOR_SIZE;
 441    s->bmap_sector = header.offset_bmap / SECTOR_SIZE;
 442    s->header = header;
 443
 444    bmap_size = header.blocks_in_image * sizeof(uint32_t);
 445    bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE;
 446    s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
 447    ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
 448    if (ret < 0) {
 449        goto fail_free_bmap;
 450    }
 451
 452    /* Disable migration when vdi images are used */
 453    error_set(&s->migration_blocker,
 454              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
 455              "vdi", bs->device_name, "live migration");
 456    migrate_add_blocker(s->migration_blocker);
 457
 458    return 0;
 459
 460 fail_free_bmap:
 461    g_free(s->bmap);
 462
 463 fail:
 464    return ret;
 465}
 466
 467static int vdi_reopen_prepare(BDRVReopenState *state,
 468                              BlockReopenQueue *queue, Error **errp)
 469{
 470    return 0;
 471}
 472
 473static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
 474        int64_t sector_num, int nb_sectors, int *pnum)
 475{
 476    /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
 477    BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
 478    size_t bmap_index = sector_num / s->block_sectors;
 479    size_t sector_in_block = sector_num % s->block_sectors;
 480    int n_sectors = s->block_sectors - sector_in_block;
 481    uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]);
 482    logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum);
 483    if (n_sectors > nb_sectors) {
 484        n_sectors = nb_sectors;
 485    }
 486    *pnum = n_sectors;
 487    return VDI_IS_ALLOCATED(bmap_entry);
 488}
 489
 490static int vdi_co_read(BlockDriverState *bs,
 491        int64_t sector_num, uint8_t *buf, int nb_sectors)
 492{
 493    BDRVVdiState *s = bs->opaque;
 494    uint32_t bmap_entry;
 495    uint32_t block_index;
 496    uint32_t sector_in_block;
 497    uint32_t n_sectors;
 498    int ret = 0;
 499
 500    logout("\n");
 501
 502    while (ret >= 0 && nb_sectors > 0) {
 503        block_index = sector_num / s->block_sectors;
 504        sector_in_block = sector_num % s->block_sectors;
 505        n_sectors = s->block_sectors - sector_in_block;
 506        if (n_sectors > nb_sectors) {
 507            n_sectors = nb_sectors;
 508        }
 509
 510        logout("will read %u sectors starting at sector %" PRIu64 "\n",
 511               n_sectors, sector_num);
 512
 513        /* prepare next AIO request */
 514        bmap_entry = le32_to_cpu(s->bmap[block_index]);
 515        if (!VDI_IS_ALLOCATED(bmap_entry)) {
 516            /* Block not allocated, return zeros, no need to wait. */
 517            memset(buf, 0, n_sectors * SECTOR_SIZE);
 518            ret = 0;
 519        } else {
 520            uint64_t offset = s->header.offset_data / SECTOR_SIZE +
 521                              (uint64_t)bmap_entry * s->block_sectors +
 522                              sector_in_block;
 523            ret = bdrv_read(bs->file, offset, buf, n_sectors);
 524        }
 525        logout("%u sectors read\n", n_sectors);
 526
 527        nb_sectors -= n_sectors;
 528        sector_num += n_sectors;
 529        buf += n_sectors * SECTOR_SIZE;
 530    }
 531
 532    return ret;
 533}
 534
 535static int vdi_co_write(BlockDriverState *bs,
 536        int64_t sector_num, const uint8_t *buf, int nb_sectors)
 537{
 538    BDRVVdiState *s = bs->opaque;
 539    uint32_t bmap_entry;
 540    uint32_t block_index;
 541    uint32_t sector_in_block;
 542    uint32_t n_sectors;
 543    uint32_t bmap_first = VDI_UNALLOCATED;
 544    uint32_t bmap_last = VDI_UNALLOCATED;
 545    uint8_t *block = NULL;
 546    int ret = 0;
 547
 548    logout("\n");
 549
 550    while (ret >= 0 && nb_sectors > 0) {
 551        block_index = sector_num / s->block_sectors;
 552        sector_in_block = sector_num % s->block_sectors;
 553        n_sectors = s->block_sectors - sector_in_block;
 554        if (n_sectors > nb_sectors) {
 555            n_sectors = nb_sectors;
 556        }
 557
 558        logout("will write %u sectors starting at sector %" PRIu64 "\n",
 559               n_sectors, sector_num);
 560
 561        /* prepare next AIO request */
 562        bmap_entry = le32_to_cpu(s->bmap[block_index]);
 563        if (!VDI_IS_ALLOCATED(bmap_entry)) {
 564            /* Allocate new block and write to it. */
 565            uint64_t offset;
 566            bmap_entry = s->header.blocks_allocated;
 567            s->bmap[block_index] = cpu_to_le32(bmap_entry);
 568            s->header.blocks_allocated++;
 569            offset = s->header.offset_data / SECTOR_SIZE +
 570                     (uint64_t)bmap_entry * s->block_sectors;
 571            if (block == NULL) {
 572                block = g_malloc(s->block_size);
 573                bmap_first = block_index;
 574            }
 575            bmap_last = block_index;
 576            /* Copy data to be written to new block and zero unused parts. */
 577            memset(block, 0, sector_in_block * SECTOR_SIZE);
 578            memcpy(block + sector_in_block * SECTOR_SIZE,
 579                   buf, n_sectors * SECTOR_SIZE);
 580            memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
 581                   (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
 582            ret = bdrv_write(bs->file, offset, block, s->block_sectors);
 583        } else {
 584            uint64_t offset = s->header.offset_data / SECTOR_SIZE +
 585                              (uint64_t)bmap_entry * s->block_sectors +
 586                              sector_in_block;
 587            ret = bdrv_write(bs->file, offset, buf, n_sectors);
 588        }
 589
 590        nb_sectors -= n_sectors;
 591        sector_num += n_sectors;
 592        buf += n_sectors * SECTOR_SIZE;
 593
 594        logout("%u sectors written\n", n_sectors);
 595    }
 596
 597    logout("finished data write\n");
 598    if (ret < 0) {
 599        return ret;
 600    }
 601
 602    if (block) {
 603        /* One or more new blocks were allocated. */
 604        VdiHeader *header = (VdiHeader *) block;
 605        uint8_t *base;
 606        uint64_t offset;
 607
 608        logout("now writing modified header\n");
 609        assert(VDI_IS_ALLOCATED(bmap_first));
 610        *header = s->header;
 611        vdi_header_to_le(header);
 612        ret = bdrv_write(bs->file, 0, block, 1);
 613        g_free(block);
 614        block = NULL;
 615
 616        if (ret < 0) {
 617            return ret;
 618        }
 619
 620        logout("now writing modified block map entry %u...%u\n",
 621               bmap_first, bmap_last);
 622        /* Write modified sectors from block map. */
 623        bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
 624        bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
 625        n_sectors = bmap_last - bmap_first + 1;
 626        offset = s->bmap_sector + bmap_first;
 627        base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
 628        logout("will write %u block map sectors starting from entry %u\n",
 629               n_sectors, bmap_first);
 630        ret = bdrv_write(bs->file, offset, base, n_sectors);
 631    }
 632
 633    return ret;
 634}
 635
 636static int vdi_create(const char *filename, QEMUOptionParameter *options)
 637{
 638    int fd;
 639    int result = 0;
 640    uint64_t bytes = 0;
 641    uint32_t blocks;
 642    size_t block_size = DEFAULT_CLUSTER_SIZE;
 643    uint32_t image_type = VDI_TYPE_DYNAMIC;
 644    VdiHeader header;
 645    size_t i;
 646    size_t bmap_size;
 647
 648    logout("\n");
 649
 650    /* Read out options. */
 651    while (options && options->name) {
 652        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
 653            bytes = options->value.n;
 654#if defined(CONFIG_VDI_BLOCK_SIZE)
 655        } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
 656            if (options->value.n) {
 657                /* TODO: Additional checks (SECTOR_SIZE * 2^n, ...). */
 658                block_size = options->value.n;
 659            }
 660#endif
 661#if defined(CONFIG_VDI_STATIC_IMAGE)
 662        } else if (!strcmp(options->name, BLOCK_OPT_STATIC)) {
 663            if (options->value.n) {
 664                image_type = VDI_TYPE_STATIC;
 665            }
 666#endif
 667        }
 668        options++;
 669    }
 670
 671    fd = qemu_open(filename,
 672                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
 673                   0644);
 674    if (fd < 0) {
 675        return -errno;
 676    }
 677
 678    /* We need enough blocks to store the given disk size,
 679       so always round up. */
 680    blocks = (bytes + block_size - 1) / block_size;
 681
 682    bmap_size = blocks * sizeof(uint32_t);
 683    bmap_size = ((bmap_size + SECTOR_SIZE - 1) & ~(SECTOR_SIZE -1));
 684
 685    memset(&header, 0, sizeof(header));
 686    pstrcpy(header.text, sizeof(header.text), VDI_TEXT);
 687    header.signature = VDI_SIGNATURE;
 688    header.version = VDI_VERSION_1_1;
 689    header.header_size = 0x180;
 690    header.image_type = image_type;
 691    header.offset_bmap = 0x200;
 692    header.offset_data = 0x200 + bmap_size;
 693    header.sector_size = SECTOR_SIZE;
 694    header.disk_size = bytes;
 695    header.block_size = block_size;
 696    header.blocks_in_image = blocks;
 697    if (image_type == VDI_TYPE_STATIC) {
 698        header.blocks_allocated = blocks;
 699    }
 700    uuid_generate(header.uuid_image);
 701    uuid_generate(header.uuid_last_snap);
 702    /* There is no need to set header.uuid_link or header.uuid_parent here. */
 703#if defined(CONFIG_VDI_DEBUG)
 704    vdi_header_print(&header);
 705#endif
 706    vdi_header_to_le(&header);
 707    if (write(fd, &header, sizeof(header)) < 0) {
 708        result = -errno;
 709    }
 710
 711    if (bmap_size > 0) {
 712        uint32_t *bmap = g_malloc0(bmap_size);
 713        for (i = 0; i < blocks; i++) {
 714            if (image_type == VDI_TYPE_STATIC) {
 715                bmap[i] = i;
 716            } else {
 717                bmap[i] = VDI_UNALLOCATED;
 718            }
 719        }
 720        if (write(fd, bmap, bmap_size) < 0) {
 721            result = -errno;
 722        }
 723        g_free(bmap);
 724    }
 725
 726    if (image_type == VDI_TYPE_STATIC) {
 727        if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
 728            result = -errno;
 729        }
 730    }
 731
 732    if (close(fd) < 0) {
 733        result = -errno;
 734    }
 735
 736    return result;
 737}
 738
 739static void vdi_close(BlockDriverState *bs)
 740{
 741    BDRVVdiState *s = bs->opaque;
 742
 743    g_free(s->bmap);
 744
 745    migrate_del_blocker(s->migration_blocker);
 746    error_free(s->migration_blocker);
 747}
 748
 749static QEMUOptionParameter vdi_create_options[] = {
 750    {
 751        .name = BLOCK_OPT_SIZE,
 752        .type = OPT_SIZE,
 753        .help = "Virtual disk size"
 754    },
 755#if defined(CONFIG_VDI_BLOCK_SIZE)
 756    {
 757        .name = BLOCK_OPT_CLUSTER_SIZE,
 758        .type = OPT_SIZE,
 759        .help = "VDI cluster (block) size",
 760        .value = { .n = DEFAULT_CLUSTER_SIZE },
 761    },
 762#endif
 763#if defined(CONFIG_VDI_STATIC_IMAGE)
 764    {
 765        .name = BLOCK_OPT_STATIC,
 766        .type = OPT_FLAG,
 767        .help = "VDI static (pre-allocated) image"
 768    },
 769#endif
 770    /* TODO: An additional option to set UUID values might be useful. */
 771    { NULL }
 772};
 773
 774static BlockDriver bdrv_vdi = {
 775    .format_name = "vdi",
 776    .instance_size = sizeof(BDRVVdiState),
 777    .bdrv_probe = vdi_probe,
 778    .bdrv_open = vdi_open,
 779    .bdrv_close = vdi_close,
 780    .bdrv_reopen_prepare = vdi_reopen_prepare,
 781    .bdrv_create = vdi_create,
 782    .bdrv_co_is_allocated = vdi_co_is_allocated,
 783    .bdrv_make_empty = vdi_make_empty,
 784
 785    .bdrv_read = vdi_co_read,
 786#if defined(CONFIG_VDI_WRITE)
 787    .bdrv_write = vdi_co_write,
 788#endif
 789
 790    .bdrv_get_info = vdi_get_info,
 791
 792    .create_options = vdi_create_options,
 793    .bdrv_check = vdi_check,
 794};
 795
 796static void bdrv_vdi_init(void)
 797{
 798    logout("\n");
 799    bdrv_register(&bdrv_vdi);
 800}
 801
 802block_init(bdrv_vdi_init);
 803