qemu/block.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator block driver
   3 *
   4 * Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24#include "config-host.h"
  25#include "qemu-common.h"
  26#include "trace.h"
  27#include "monitor.h"
  28#include "block_int.h"
  29#include "module.h"
  30#include "qemu-objects.h"
  31
  32#ifdef CONFIG_BSD
  33#include <sys/types.h>
  34#include <sys/stat.h>
  35#include <sys/ioctl.h>
  36#include <sys/queue.h>
  37#ifndef __DragonFly__
  38#include <sys/disk.h>
  39#endif
  40#endif
  41
  42#ifdef _WIN32
  43#include <windows.h>
  44#endif
  45
  46static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
  47        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
  48        BlockDriverCompletionFunc *cb, void *opaque);
  49static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
  50        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
  51        BlockDriverCompletionFunc *cb, void *opaque);
  52static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
  53        BlockDriverCompletionFunc *cb, void *opaque);
  54static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
  55        BlockDriverCompletionFunc *cb, void *opaque);
  56static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
  57                        uint8_t *buf, int nb_sectors);
  58static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
  59                         const uint8_t *buf, int nb_sectors);
  60
  61static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
  62    QTAILQ_HEAD_INITIALIZER(bdrv_states);
  63
  64static QLIST_HEAD(, BlockDriver) bdrv_drivers =
  65    QLIST_HEAD_INITIALIZER(bdrv_drivers);
  66
  67/* The device to use for VM snapshots */
  68static BlockDriverState *bs_snapshots;
  69
  70/* If non-zero, use only whitelisted block drivers */
  71static int use_bdrv_whitelist;
  72
  73#ifdef _WIN32
  74static int is_windows_drive_prefix(const char *filename)
  75{
  76    return (((filename[0] >= 'a' && filename[0] <= 'z') ||
  77             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
  78            filename[1] == ':');
  79}
  80
  81int is_windows_drive(const char *filename)
  82{
  83    if (is_windows_drive_prefix(filename) &&
  84        filename[2] == '\0')
  85        return 1;
  86    if (strstart(filename, "\\\\.\\", NULL) ||
  87        strstart(filename, "//./", NULL))
  88        return 1;
  89    return 0;
  90}
  91#endif
  92
  93/* check if the path starts with "<protocol>:" */
  94static int path_has_protocol(const char *path)
  95{
  96#ifdef _WIN32
  97    if (is_windows_drive(path) ||
  98        is_windows_drive_prefix(path)) {
  99        return 0;
 100    }
 101#endif
 102
 103    return strchr(path, ':') != NULL;
 104}
 105
 106int path_is_absolute(const char *path)
 107{
 108    const char *p;
 109#ifdef _WIN32
 110    /* specific case for names like: "\\.\d:" */
 111    if (*path == '/' || *path == '\\')
 112        return 1;
 113#endif
 114    p = strchr(path, ':');
 115    if (p)
 116        p++;
 117    else
 118        p = path;
 119#ifdef _WIN32
 120    return (*p == '/' || *p == '\\');
 121#else
 122    return (*p == '/');
 123#endif
 124}
 125
 126/* if filename is absolute, just copy it to dest. Otherwise, build a
 127   path to it by considering it is relative to base_path. URL are
 128   supported. */
 129void path_combine(char *dest, int dest_size,
 130                  const char *base_path,
 131                  const char *filename)
 132{
 133    const char *p, *p1;
 134    int len;
 135
 136    if (dest_size <= 0)
 137        return;
 138    if (path_is_absolute(filename)) {
 139        pstrcpy(dest, dest_size, filename);
 140    } else {
 141        p = strchr(base_path, ':');
 142        if (p)
 143            p++;
 144        else
 145            p = base_path;
 146        p1 = strrchr(base_path, '/');
 147#ifdef _WIN32
 148        {
 149            const char *p2;
 150            p2 = strrchr(base_path, '\\');
 151            if (!p1 || p2 > p1)
 152                p1 = p2;
 153        }
 154#endif
 155        if (p1)
 156            p1++;
 157        else
 158            p1 = base_path;
 159        if (p1 > p)
 160            p = p1;
 161        len = p - base_path;
 162        if (len > dest_size - 1)
 163            len = dest_size - 1;
 164        memcpy(dest, base_path, len);
 165        dest[len] = '\0';
 166        pstrcat(dest, dest_size, filename);
 167    }
 168}
 169
 170void bdrv_register(BlockDriver *bdrv)
 171{
 172    if (!bdrv->bdrv_aio_readv) {
 173        /* add AIO emulation layer */
 174        bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
 175        bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
 176    } else if (!bdrv->bdrv_read) {
 177        /* add synchronous IO emulation layer */
 178        bdrv->bdrv_read = bdrv_read_em;
 179        bdrv->bdrv_write = bdrv_write_em;
 180    }
 181
 182    if (!bdrv->bdrv_aio_flush)
 183        bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
 184
 185    QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 186}
 187
 188/* create a new block device (by default it is empty) */
 189BlockDriverState *bdrv_new(const char *device_name)
 190{
 191    BlockDriverState *bs;
 192
 193    bs = qemu_mallocz(sizeof(BlockDriverState));
 194    pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
 195    if (device_name[0] != '\0') {
 196        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
 197    }
 198    return bs;
 199}
 200
 201BlockDriver *bdrv_find_format(const char *format_name)
 202{
 203    BlockDriver *drv1;
 204    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
 205        if (!strcmp(drv1->format_name, format_name)) {
 206            return drv1;
 207        }
 208    }
 209    return NULL;
 210}
 211
 212static int bdrv_is_whitelisted(BlockDriver *drv)
 213{
 214    static const char *whitelist[] = {
 215        CONFIG_BDRV_WHITELIST
 216    };
 217    const char **p;
 218
 219    if (!whitelist[0])
 220        return 1;               /* no whitelist, anything goes */
 221
 222    for (p = whitelist; *p; p++) {
 223        if (!strcmp(drv->format_name, *p)) {
 224            return 1;
 225        }
 226    }
 227    return 0;
 228}
 229
 230BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
 231{
 232    BlockDriver *drv = bdrv_find_format(format_name);
 233    return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
 234}
 235
 236int bdrv_create(BlockDriver *drv, const char* filename,
 237    QEMUOptionParameter *options)
 238{
 239    if (!drv->bdrv_create)
 240        return -ENOTSUP;
 241
 242    return drv->bdrv_create(filename, options);
 243}
 244
 245int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
 246{
 247    BlockDriver *drv;
 248
 249    drv = bdrv_find_protocol(filename);
 250    if (drv == NULL) {
 251        return -ENOENT;
 252    }
 253
 254    return bdrv_create(drv, filename, options);
 255}
 256
 257#ifdef _WIN32
 258void get_tmp_filename(char *filename, int size)
 259{
 260    char temp_dir[MAX_PATH];
 261
 262    GetTempPath(MAX_PATH, temp_dir);
 263    GetTempFileName(temp_dir, "qem", 0, filename);
 264}
 265#else
 266void get_tmp_filename(char *filename, int size)
 267{
 268    int fd;
 269    const char *tmpdir;
 270    /* XXX: race condition possible */
 271    tmpdir = getenv("TMPDIR");
 272    if (!tmpdir)
 273        tmpdir = "/tmp";
 274    snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
 275    fd = mkstemp(filename);
 276    close(fd);
 277}
 278#endif
 279
 280/*
 281 * Detect host devices. By convention, /dev/cdrom[N] is always
 282 * recognized as a host CDROM.
 283 */
 284static BlockDriver *find_hdev_driver(const char *filename)
 285{
 286    int score_max = 0, score;
 287    BlockDriver *drv = NULL, *d;
 288
 289    QLIST_FOREACH(d, &bdrv_drivers, list) {
 290        if (d->bdrv_probe_device) {
 291            score = d->bdrv_probe_device(filename);
 292            if (score > score_max) {
 293                score_max = score;
 294                drv = d;
 295            }
 296        }
 297    }
 298
 299    return drv;
 300}
 301
 302BlockDriver *bdrv_find_protocol(const char *filename)
 303{
 304    BlockDriver *drv1;
 305    char protocol[128];
 306    int len;
 307    const char *p;
 308
 309    /* TODO Drivers without bdrv_file_open must be specified explicitly */
 310
 311    /*
 312     * XXX(hch): we really should not let host device detection
 313     * override an explicit protocol specification, but moving this
 314     * later breaks access to device names with colons in them.
 315     * Thanks to the brain-dead persistent naming schemes on udev-
 316     * based Linux systems those actually are quite common.
 317     */
 318    drv1 = find_hdev_driver(filename);
 319    if (drv1) {
 320        return drv1;
 321    }
 322
 323    if (!path_has_protocol(filename)) {
 324        return bdrv_find_format("file");
 325    }
 326    p = strchr(filename, ':');
 327    assert(p != NULL);
 328    len = p - filename;
 329    if (len > sizeof(protocol) - 1)
 330        len = sizeof(protocol) - 1;
 331    memcpy(protocol, filename, len);
 332    protocol[len] = '\0';
 333    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
 334        if (drv1->protocol_name &&
 335            !strcmp(drv1->protocol_name, protocol)) {
 336            return drv1;
 337        }
 338    }
 339    return NULL;
 340}
 341
 342static int find_image_format(const char *filename, BlockDriver **pdrv)
 343{
 344    int ret, score, score_max;
 345    BlockDriver *drv1, *drv;
 346    uint8_t buf[2048];
 347    BlockDriverState *bs;
 348
 349    ret = bdrv_file_open(&bs, filename, 0);
 350    if (ret < 0) {
 351        *pdrv = NULL;
 352        return ret;
 353    }
 354
 355    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
 356    if (bs->sg || !bdrv_is_inserted(bs)) {
 357        bdrv_delete(bs);
 358        drv = bdrv_find_format("raw");
 359        if (!drv) {
 360            ret = -ENOENT;
 361        }
 362        *pdrv = drv;
 363        return ret;
 364    }
 365
 366    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
 367    bdrv_delete(bs);
 368    if (ret < 0) {
 369        *pdrv = NULL;
 370        return ret;
 371    }
 372
 373    score_max = 0;
 374    drv = NULL;
 375    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
 376        if (drv1->bdrv_probe) {
 377            score = drv1->bdrv_probe(buf, ret, filename);
 378            if (score > score_max) {
 379                score_max = score;
 380                drv = drv1;
 381            }
 382        }
 383    }
 384    if (!drv) {
 385        ret = -ENOENT;
 386    }
 387    *pdrv = drv;
 388    return ret;
 389}
 390
 391/**
 392 * Set the current 'total_sectors' value
 393 */
 394static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
 395{
 396    BlockDriver *drv = bs->drv;
 397
 398    /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
 399    if (bs->sg)
 400        return 0;
 401
 402    /* query actual device if possible, otherwise just trust the hint */
 403    if (drv->bdrv_getlength) {
 404        int64_t length = drv->bdrv_getlength(bs);
 405        if (length < 0) {
 406            return length;
 407        }
 408        hint = length >> BDRV_SECTOR_BITS;
 409    }
 410
 411    bs->total_sectors = hint;
 412    return 0;
 413}
 414
 415/*
 416 * Common part for opening disk images and files
 417 */
 418static int bdrv_open_common(BlockDriverState *bs, const char *filename,
 419    int flags, BlockDriver *drv)
 420{
 421    int ret, open_flags;
 422
 423    assert(drv != NULL);
 424
 425    bs->file = NULL;
 426    bs->total_sectors = 0;
 427    bs->encrypted = 0;
 428    bs->valid_key = 0;
 429    bs->open_flags = flags;
 430    /* buffer_alignment defaulted to 512, drivers can change this value */
 431    bs->buffer_alignment = 512;
 432
 433    pstrcpy(bs->filename, sizeof(bs->filename), filename);
 434
 435    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
 436        return -ENOTSUP;
 437    }
 438
 439    bs->drv = drv;
 440    bs->opaque = qemu_mallocz(drv->instance_size);
 441
 442    /*
 443     * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
 444     * write cache to the guest.  We do need the fdatasync to flush
 445     * out transactions for block allocations, and we maybe have a
 446     * volatile write cache in our backing device to deal with.
 447     */
 448    if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
 449        bs->enable_write_cache = 1;
 450
 451    /*
 452     * Clear flags that are internal to the block layer before opening the
 453     * image.
 454     */
 455    open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
 456
 457    /*
 458     * Snapshots should be writeable.
 459     */
 460    if (bs->is_temporary) {
 461        open_flags |= BDRV_O_RDWR;
 462    }
 463
 464    /* Open the image, either directly or using a protocol */
 465    if (drv->bdrv_file_open) {
 466        ret = drv->bdrv_file_open(bs, filename, open_flags);
 467    } else {
 468        ret = bdrv_file_open(&bs->file, filename, open_flags);
 469        if (ret >= 0) {
 470            ret = drv->bdrv_open(bs, open_flags);
 471        }
 472    }
 473
 474    if (ret < 0) {
 475        goto free_and_fail;
 476    }
 477
 478    bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
 479
 480    ret = refresh_total_sectors(bs, bs->total_sectors);
 481    if (ret < 0) {
 482        goto free_and_fail;
 483    }
 484
 485#ifndef _WIN32
 486    if (bs->is_temporary) {
 487        unlink(filename);
 488    }
 489#endif
 490    return 0;
 491
 492free_and_fail:
 493    if (bs->file) {
 494        bdrv_delete(bs->file);
 495        bs->file = NULL;
 496    }
 497    qemu_free(bs->opaque);
 498    bs->opaque = NULL;
 499    bs->drv = NULL;
 500    return ret;
 501}
 502
 503/*
 504 * Opens a file using a protocol (file, host_device, nbd, ...)
 505 */
 506int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
 507{
 508    BlockDriverState *bs;
 509    BlockDriver *drv;
 510    int ret;
 511
 512    drv = bdrv_find_protocol(filename);
 513    if (!drv) {
 514        return -ENOENT;
 515    }
 516
 517    bs = bdrv_new("");
 518    ret = bdrv_open_common(bs, filename, flags, drv);
 519    if (ret < 0) {
 520        bdrv_delete(bs);
 521        return ret;
 522    }
 523    bs->growable = 1;
 524    *pbs = bs;
 525    return 0;
 526}
 527
 528/*
 529 * Opens a disk image (raw, qcow2, vmdk, ...)
 530 */
 531int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
 532              BlockDriver *drv)
 533{
 534    int ret;
 535
 536    if (flags & BDRV_O_SNAPSHOT) {
 537        BlockDriverState *bs1;
 538        int64_t total_size;
 539        int is_protocol = 0;
 540        BlockDriver *bdrv_qcow2;
 541        QEMUOptionParameter *options;
 542        char tmp_filename[PATH_MAX];
 543        char backing_filename[PATH_MAX];
 544
 545        /* if snapshot, we create a temporary backing file and open it
 546           instead of opening 'filename' directly */
 547
 548        /* if there is a backing file, use it */
 549        bs1 = bdrv_new("");
 550        ret = bdrv_open(bs1, filename, 0, drv);
 551        if (ret < 0) {
 552            bdrv_delete(bs1);
 553            return ret;
 554        }
 555        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
 556
 557        if (bs1->drv && bs1->drv->protocol_name)
 558            is_protocol = 1;
 559
 560        bdrv_delete(bs1);
 561
 562        get_tmp_filename(tmp_filename, sizeof(tmp_filename));
 563
 564        /* Real path is meaningless for protocols */
 565        if (is_protocol)
 566            snprintf(backing_filename, sizeof(backing_filename),
 567                     "%s", filename);
 568        else if (!realpath(filename, backing_filename))
 569            return -errno;
 570
 571        bdrv_qcow2 = bdrv_find_format("qcow2");
 572        options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
 573
 574        set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
 575        set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
 576        if (drv) {
 577            set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
 578                drv->format_name);
 579        }
 580
 581        ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
 582        free_option_parameters(options);
 583        if (ret < 0) {
 584            return ret;
 585        }
 586
 587        filename = tmp_filename;
 588        drv = bdrv_qcow2;
 589        bs->is_temporary = 1;
 590    }
 591
 592    /* Find the right image format driver */
 593    if (!drv) {
 594        ret = find_image_format(filename, &drv);
 595    }
 596
 597    if (!drv) {
 598        goto unlink_and_fail;
 599    }
 600
 601    /* Open the image */
 602    ret = bdrv_open_common(bs, filename, flags, drv);
 603    if (ret < 0) {
 604        goto unlink_and_fail;
 605    }
 606
 607    /* If there is a backing file, use it */
 608    if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
 609        char backing_filename[PATH_MAX];
 610        int back_flags;
 611        BlockDriver *back_drv = NULL;
 612
 613        bs->backing_hd = bdrv_new("");
 614
 615        if (path_has_protocol(bs->backing_file)) {
 616            pstrcpy(backing_filename, sizeof(backing_filename),
 617                    bs->backing_file);
 618        } else {
 619            path_combine(backing_filename, sizeof(backing_filename),
 620                         filename, bs->backing_file);
 621        }
 622
 623        if (bs->backing_format[0] != '\0') {
 624            back_drv = bdrv_find_format(bs->backing_format);
 625        }
 626
 627        /* backing files always opened read-only */
 628        back_flags =
 629            flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
 630
 631        ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
 632        if (ret < 0) {
 633            bdrv_close(bs);
 634            return ret;
 635        }
 636        if (bs->is_temporary) {
 637            bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
 638        } else {
 639            /* base image inherits from "parent" */
 640            bs->backing_hd->keep_read_only = bs->keep_read_only;
 641        }
 642    }
 643
 644    if (!bdrv_key_required(bs)) {
 645        /* call the change callback */
 646        bs->media_changed = 1;
 647        if (bs->change_cb)
 648            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
 649    }
 650
 651    return 0;
 652
 653unlink_and_fail:
 654    if (bs->is_temporary) {
 655        unlink(filename);
 656    }
 657    return ret;
 658}
 659
 660void bdrv_close(BlockDriverState *bs)
 661{
 662    if (bs->drv) {
 663        if (bs == bs_snapshots) {
 664            bs_snapshots = NULL;
 665        }
 666        if (bs->backing_hd) {
 667            bdrv_delete(bs->backing_hd);
 668            bs->backing_hd = NULL;
 669        }
 670        bs->drv->bdrv_close(bs);
 671        qemu_free(bs->opaque);
 672#ifdef _WIN32
 673        if (bs->is_temporary) {
 674            unlink(bs->filename);
 675        }
 676#endif
 677        bs->opaque = NULL;
 678        bs->drv = NULL;
 679
 680        if (bs->file != NULL) {
 681            bdrv_close(bs->file);
 682        }
 683
 684        /* call the change callback */
 685        bs->media_changed = 1;
 686        if (bs->change_cb)
 687            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
 688    }
 689}
 690
 691void bdrv_close_all(void)
 692{
 693    BlockDriverState *bs;
 694
 695    QTAILQ_FOREACH(bs, &bdrv_states, list) {
 696        bdrv_close(bs);
 697    }
 698}
 699
 700void bdrv_delete(BlockDriverState *bs)
 701{
 702    assert(!bs->peer);
 703
 704    /* remove from list, if necessary */
 705    if (bs->device_name[0] != '\0') {
 706        QTAILQ_REMOVE(&bdrv_states, bs, list);
 707    }
 708
 709    bdrv_close(bs);
 710    if (bs->file != NULL) {
 711        bdrv_delete(bs->file);
 712    }
 713
 714    assert(bs != bs_snapshots);
 715    qemu_free(bs);
 716}
 717
 718int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
 719{
 720    if (bs->peer) {
 721        return -EBUSY;
 722    }
 723    bs->peer = qdev;
 724    return 0;
 725}
 726
 727void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
 728{
 729    assert(bs->peer == qdev);
 730    bs->peer = NULL;
 731}
 732
 733DeviceState *bdrv_get_attached(BlockDriverState *bs)
 734{
 735    return bs->peer;
 736}
 737
 738/*
 739 * Run consistency checks on an image
 740 *
 741 * Returns 0 if the check could be completed (it doesn't mean that the image is
 742 * free of errors) or -errno when an internal error occured. The results of the
 743 * check are stored in res.
 744 */
 745int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
 746{
 747    if (bs->drv->bdrv_check == NULL) {
 748        return -ENOTSUP;
 749    }
 750
 751    memset(res, 0, sizeof(*res));
 752    return bs->drv->bdrv_check(bs, res);
 753}
 754
 755#define COMMIT_BUF_SECTORS 2048
 756
 757/* commit COW file into the raw image */
 758int bdrv_commit(BlockDriverState *bs)
 759{
 760    BlockDriver *drv = bs->drv;
 761    BlockDriver *backing_drv;
 762    int64_t sector, total_sectors;
 763    int n, ro, open_flags;
 764    int ret = 0, rw_ret = 0;
 765    uint8_t *buf;
 766    char filename[1024];
 767    BlockDriverState *bs_rw, *bs_ro;
 768
 769    if (!drv)
 770        return -ENOMEDIUM;
 771    
 772    if (!bs->backing_hd) {
 773        return -ENOTSUP;
 774    }
 775
 776    if (bs->backing_hd->keep_read_only) {
 777        return -EACCES;
 778    }
 779
 780    backing_drv = bs->backing_hd->drv;
 781    ro = bs->backing_hd->read_only;
 782    strncpy(filename, bs->backing_hd->filename, sizeof(filename));
 783    open_flags =  bs->backing_hd->open_flags;
 784
 785    if (ro) {
 786        /* re-open as RW */
 787        bdrv_delete(bs->backing_hd);
 788        bs->backing_hd = NULL;
 789        bs_rw = bdrv_new("");
 790        rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
 791            backing_drv);
 792        if (rw_ret < 0) {
 793            bdrv_delete(bs_rw);
 794            /* try to re-open read-only */
 795            bs_ro = bdrv_new("");
 796            ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
 797                backing_drv);
 798            if (ret < 0) {
 799                bdrv_delete(bs_ro);
 800                /* drive not functional anymore */
 801                bs->drv = NULL;
 802                return ret;
 803            }
 804            bs->backing_hd = bs_ro;
 805            return rw_ret;
 806        }
 807        bs->backing_hd = bs_rw;
 808    }
 809
 810    total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
 811    buf = qemu_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
 812
 813    for (sector = 0; sector < total_sectors; sector += n) {
 814        if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
 815
 816            if (bdrv_read(bs, sector, buf, n) != 0) {
 817                ret = -EIO;
 818                goto ro_cleanup;
 819            }
 820
 821            if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
 822                ret = -EIO;
 823                goto ro_cleanup;
 824            }
 825        }
 826    }
 827
 828    if (drv->bdrv_make_empty) {
 829        ret = drv->bdrv_make_empty(bs);
 830        bdrv_flush(bs);
 831    }
 832
 833    /*
 834     * Make sure all data we wrote to the backing device is actually
 835     * stable on disk.
 836     */
 837    if (bs->backing_hd)
 838        bdrv_flush(bs->backing_hd);
 839
 840ro_cleanup:
 841    qemu_free(buf);
 842
 843    if (ro) {
 844        /* re-open as RO */
 845        bdrv_delete(bs->backing_hd);
 846        bs->backing_hd = NULL;
 847        bs_ro = bdrv_new("");
 848        ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
 849            backing_drv);
 850        if (ret < 0) {
 851            bdrv_delete(bs_ro);
 852            /* drive not functional anymore */
 853            bs->drv = NULL;
 854            return ret;
 855        }
 856        bs->backing_hd = bs_ro;
 857        bs->backing_hd->keep_read_only = 0;
 858    }
 859
 860    return ret;
 861}
 862
 863void bdrv_commit_all(void)
 864{
 865    BlockDriverState *bs;
 866
 867    QTAILQ_FOREACH(bs, &bdrv_states, list) {
 868        bdrv_commit(bs);
 869    }
 870}
 871
 872/*
 873 * Return values:
 874 * 0        - success
 875 * -EINVAL  - backing format specified, but no file
 876 * -ENOSPC  - can't update the backing file because no space is left in the
 877 *            image file header
 878 * -ENOTSUP - format driver doesn't support changing the backing file
 879 */
 880int bdrv_change_backing_file(BlockDriverState *bs,
 881    const char *backing_file, const char *backing_fmt)
 882{
 883    BlockDriver *drv = bs->drv;
 884
 885    if (drv->bdrv_change_backing_file != NULL) {
 886        return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
 887    } else {
 888        return -ENOTSUP;
 889    }
 890}
 891
 892static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
 893                                   size_t size)
 894{
 895    int64_t len;
 896
 897    if (!bdrv_is_inserted(bs))
 898        return -ENOMEDIUM;
 899
 900    if (bs->growable)
 901        return 0;
 902
 903    len = bdrv_getlength(bs);
 904
 905    if (offset < 0)
 906        return -EIO;
 907
 908    if ((offset > len) || (len - offset < size))
 909        return -EIO;
 910
 911    return 0;
 912}
 913
 914static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
 915                              int nb_sectors)
 916{
 917    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
 918                                   nb_sectors * BDRV_SECTOR_SIZE);
 919}
 920
 921/* return < 0 if error. See bdrv_write() for the return codes */
 922int bdrv_read(BlockDriverState *bs, int64_t sector_num,
 923              uint8_t *buf, int nb_sectors)
 924{
 925    BlockDriver *drv = bs->drv;
 926
 927    if (!drv)
 928        return -ENOMEDIUM;
 929    if (bdrv_check_request(bs, sector_num, nb_sectors))
 930        return -EIO;
 931
 932    return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
 933}
 934
 935static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
 936                             int nb_sectors, int dirty)
 937{
 938    int64_t start, end;
 939    unsigned long val, idx, bit;
 940
 941    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
 942    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
 943
 944    for (; start <= end; start++) {
 945        idx = start / (sizeof(unsigned long) * 8);
 946        bit = start % (sizeof(unsigned long) * 8);
 947        val = bs->dirty_bitmap[idx];
 948        if (dirty) {
 949            if (!(val & (1UL << bit))) {
 950                bs->dirty_count++;
 951                val |= 1UL << bit;
 952            }
 953        } else {
 954            if (val & (1UL << bit)) {
 955                bs->dirty_count--;
 956                val &= ~(1UL << bit);
 957            }
 958        }
 959        bs->dirty_bitmap[idx] = val;
 960    }
 961}
 962
 963/* Return < 0 if error. Important errors are:
 964  -EIO         generic I/O error (may happen for all errors)
 965  -ENOMEDIUM   No media inserted.
 966  -EINVAL      Invalid sector number or nb_sectors
 967  -EACCES      Trying to write a read-only device
 968*/
 969int bdrv_write(BlockDriverState *bs, int64_t sector_num,
 970               const uint8_t *buf, int nb_sectors)
 971{
 972    BlockDriver *drv = bs->drv;
 973    if (!bs->drv)
 974        return -ENOMEDIUM;
 975    if (bs->read_only)
 976        return -EACCES;
 977    if (bdrv_check_request(bs, sector_num, nb_sectors))
 978        return -EIO;
 979
 980    if (bs->dirty_bitmap) {
 981        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
 982    }
 983
 984    if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
 985        bs->wr_highest_sector = sector_num + nb_sectors - 1;
 986    }
 987
 988    return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
 989}
 990
 991int bdrv_pread(BlockDriverState *bs, int64_t offset,
 992               void *buf, int count1)
 993{
 994    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
 995    int len, nb_sectors, count;
 996    int64_t sector_num;
 997    int ret;
 998
 999    count = count1;
1000    /* first read to align to sector start */
1001    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1002    if (len > count)
1003        len = count;
1004    sector_num = offset >> BDRV_SECTOR_BITS;
1005    if (len > 0) {
1006        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1007            return ret;
1008        memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1009        count -= len;
1010        if (count == 0)
1011            return count1;
1012        sector_num++;
1013        buf += len;
1014    }
1015
1016    /* read the sectors "in place" */
1017    nb_sectors = count >> BDRV_SECTOR_BITS;
1018    if (nb_sectors > 0) {
1019        if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1020            return ret;
1021        sector_num += nb_sectors;
1022        len = nb_sectors << BDRV_SECTOR_BITS;
1023        buf += len;
1024        count -= len;
1025    }
1026
1027    /* add data from the last sector */
1028    if (count > 0) {
1029        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1030            return ret;
1031        memcpy(buf, tmp_buf, count);
1032    }
1033    return count1;
1034}
1035
1036int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1037                const void *buf, int count1)
1038{
1039    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1040    int len, nb_sectors, count;
1041    int64_t sector_num;
1042    int ret;
1043
1044    count = count1;
1045    /* first write to align to sector start */
1046    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1047    if (len > count)
1048        len = count;
1049    sector_num = offset >> BDRV_SECTOR_BITS;
1050    if (len > 0) {
1051        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1052            return ret;
1053        memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1054        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1055            return ret;
1056        count -= len;
1057        if (count == 0)
1058            return count1;
1059        sector_num++;
1060        buf += len;
1061    }
1062
1063    /* write the sectors "in place" */
1064    nb_sectors = count >> BDRV_SECTOR_BITS;
1065    if (nb_sectors > 0) {
1066        if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1067            return ret;
1068        sector_num += nb_sectors;
1069        len = nb_sectors << BDRV_SECTOR_BITS;
1070        buf += len;
1071        count -= len;
1072    }
1073
1074    /* add data from the last sector */
1075    if (count > 0) {
1076        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1077            return ret;
1078        memcpy(tmp_buf, buf, count);
1079        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1080            return ret;
1081    }
1082    return count1;
1083}
1084
1085/*
1086 * Writes to the file and ensures that no writes are reordered across this
1087 * request (acts as a barrier)
1088 *
1089 * Returns 0 on success, -errno in error cases.
1090 */
1091int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1092    const void *buf, int count)
1093{
1094    int ret;
1095
1096    ret = bdrv_pwrite(bs, offset, buf, count);
1097    if (ret < 0) {
1098        return ret;
1099    }
1100
1101    /* No flush needed for cache=writethrough, it uses O_DSYNC */
1102    if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) {
1103        bdrv_flush(bs);
1104    }
1105
1106    return 0;
1107}
1108
1109/*
1110 * Writes to the file and ensures that no writes are reordered across this
1111 * request (acts as a barrier)
1112 *
1113 * Returns 0 on success, -errno in error cases.
1114 */
1115int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
1116    const uint8_t *buf, int nb_sectors)
1117{
1118    return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num,
1119        buf, BDRV_SECTOR_SIZE * nb_sectors);
1120}
1121
1122/**
1123 * Truncate file to 'offset' bytes (needed only for file protocols)
1124 */
1125int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1126{
1127    BlockDriver *drv = bs->drv;
1128    int ret;
1129    if (!drv)
1130        return -ENOMEDIUM;
1131    if (!drv->bdrv_truncate)
1132        return -ENOTSUP;
1133    if (bs->read_only)
1134        return -EACCES;
1135    if (bdrv_in_use(bs))
1136        return -EBUSY;
1137    ret = drv->bdrv_truncate(bs, offset);
1138    if (ret == 0) {
1139        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1140        if (bs->change_cb) {
1141            bs->change_cb(bs->change_opaque, CHANGE_SIZE);
1142        }
1143    }
1144    return ret;
1145}
1146
1147/**
1148 * Length of a file in bytes. Return < 0 if error or unknown.
1149 */
1150int64_t bdrv_getlength(BlockDriverState *bs)
1151{
1152    BlockDriver *drv = bs->drv;
1153    if (!drv)
1154        return -ENOMEDIUM;
1155
1156    /* Fixed size devices use the total_sectors value for speed instead of
1157       issuing a length query (like lseek) on each call.  Also, legacy block
1158       drivers don't provide a bdrv_getlength function and must use
1159       total_sectors. */
1160    if (!bs->growable || !drv->bdrv_getlength) {
1161        return bs->total_sectors * BDRV_SECTOR_SIZE;
1162    }
1163    return drv->bdrv_getlength(bs);
1164}
1165
1166/* return 0 as number of sectors if no device present or error */
1167void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1168{
1169    int64_t length;
1170    length = bdrv_getlength(bs);
1171    if (length < 0)
1172        length = 0;
1173    else
1174        length = length >> BDRV_SECTOR_BITS;
1175    *nb_sectors_ptr = length;
1176}
1177
1178struct partition {
1179        uint8_t boot_ind;           /* 0x80 - active */
1180        uint8_t head;               /* starting head */
1181        uint8_t sector;             /* starting sector */
1182        uint8_t cyl;                /* starting cylinder */
1183        uint8_t sys_ind;            /* What partition type */
1184        uint8_t end_head;           /* end head */
1185        uint8_t end_sector;         /* end sector */
1186        uint8_t end_cyl;            /* end cylinder */
1187        uint32_t start_sect;        /* starting sector counting from 0 */
1188        uint32_t nr_sects;          /* nr of sectors in partition */
1189} __attribute__((packed));
1190
1191/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1192static int guess_disk_lchs(BlockDriverState *bs,
1193                           int *pcylinders, int *pheads, int *psectors)
1194{
1195    uint8_t buf[BDRV_SECTOR_SIZE];
1196    int ret, i, heads, sectors, cylinders;
1197    struct partition *p;
1198    uint32_t nr_sects;
1199    uint64_t nb_sectors;
1200
1201    bdrv_get_geometry(bs, &nb_sectors);
1202
1203    ret = bdrv_read(bs, 0, buf, 1);
1204    if (ret < 0)
1205        return -1;
1206    /* test msdos magic */
1207    if (buf[510] != 0x55 || buf[511] != 0xaa)
1208        return -1;
1209    for(i = 0; i < 4; i++) {
1210        p = ((struct partition *)(buf + 0x1be)) + i;
1211        nr_sects = le32_to_cpu(p->nr_sects);
1212        if (nr_sects && p->end_head) {
1213            /* We make the assumption that the partition terminates on
1214               a cylinder boundary */
1215            heads = p->end_head + 1;
1216            sectors = p->end_sector & 63;
1217            if (sectors == 0)
1218                continue;
1219            cylinders = nb_sectors / (heads * sectors);
1220            if (cylinders < 1 || cylinders > 16383)
1221                continue;
1222            *pheads = heads;
1223            *psectors = sectors;
1224            *pcylinders = cylinders;
1225#if 0
1226            printf("guessed geometry: LCHS=%d %d %d\n",
1227                   cylinders, heads, sectors);
1228#endif
1229            return 0;
1230        }
1231    }
1232    return -1;
1233}
1234
1235void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1236{
1237    int translation, lba_detected = 0;
1238    int cylinders, heads, secs;
1239    uint64_t nb_sectors;
1240
1241    /* if a geometry hint is available, use it */
1242    bdrv_get_geometry(bs, &nb_sectors);
1243    bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1244    translation = bdrv_get_translation_hint(bs);
1245    if (cylinders != 0) {
1246        *pcyls = cylinders;
1247        *pheads = heads;
1248        *psecs = secs;
1249    } else {
1250        if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1251            if (heads > 16) {
1252                /* if heads > 16, it means that a BIOS LBA
1253                   translation was active, so the default
1254                   hardware geometry is OK */
1255                lba_detected = 1;
1256                goto default_geometry;
1257            } else {
1258                *pcyls = cylinders;
1259                *pheads = heads;
1260                *psecs = secs;
1261                /* disable any translation to be in sync with
1262                   the logical geometry */
1263                if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1264                    bdrv_set_translation_hint(bs,
1265                                              BIOS_ATA_TRANSLATION_NONE);
1266                }
1267            }
1268        } else {
1269        default_geometry:
1270            /* if no geometry, use a standard physical disk geometry */
1271            cylinders = nb_sectors / (16 * 63);
1272
1273            if (cylinders > 16383)
1274                cylinders = 16383;
1275            else if (cylinders < 2)
1276                cylinders = 2;
1277            *pcyls = cylinders;
1278            *pheads = 16;
1279            *psecs = 63;
1280            if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1281                if ((*pcyls * *pheads) <= 131072) {
1282                    bdrv_set_translation_hint(bs,
1283                                              BIOS_ATA_TRANSLATION_LARGE);
1284                } else {
1285                    bdrv_set_translation_hint(bs,
1286                                              BIOS_ATA_TRANSLATION_LBA);
1287                }
1288            }
1289        }
1290        bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1291    }
1292}
1293
1294void bdrv_set_geometry_hint(BlockDriverState *bs,
1295                            int cyls, int heads, int secs)
1296{
1297    bs->cyls = cyls;
1298    bs->heads = heads;
1299    bs->secs = secs;
1300}
1301
1302void bdrv_set_type_hint(BlockDriverState *bs, int type)
1303{
1304    bs->type = type;
1305    bs->removable = ((type == BDRV_TYPE_CDROM ||
1306                      type == BDRV_TYPE_FLOPPY));
1307}
1308
1309void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1310{
1311    bs->translation = translation;
1312}
1313
1314void bdrv_get_geometry_hint(BlockDriverState *bs,
1315                            int *pcyls, int *pheads, int *psecs)
1316{
1317    *pcyls = bs->cyls;
1318    *pheads = bs->heads;
1319    *psecs = bs->secs;
1320}
1321
1322int bdrv_get_type_hint(BlockDriverState *bs)
1323{
1324    return bs->type;
1325}
1326
1327int bdrv_get_translation_hint(BlockDriverState *bs)
1328{
1329    return bs->translation;
1330}
1331
1332void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1333                       BlockErrorAction on_write_error)
1334{
1335    bs->on_read_error = on_read_error;
1336    bs->on_write_error = on_write_error;
1337}
1338
1339BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1340{
1341    return is_read ? bs->on_read_error : bs->on_write_error;
1342}
1343
1344void bdrv_set_removable(BlockDriverState *bs, int removable)
1345{
1346    bs->removable = removable;
1347    if (removable && bs == bs_snapshots) {
1348        bs_snapshots = NULL;
1349    }
1350}
1351
1352int bdrv_is_removable(BlockDriverState *bs)
1353{
1354    return bs->removable;
1355}
1356
1357int bdrv_is_read_only(BlockDriverState *bs)
1358{
1359    return bs->read_only;
1360}
1361
1362int bdrv_is_sg(BlockDriverState *bs)
1363{
1364    return bs->sg;
1365}
1366
1367int bdrv_enable_write_cache(BlockDriverState *bs)
1368{
1369    return bs->enable_write_cache;
1370}
1371
1372/* XXX: no longer used */
1373void bdrv_set_change_cb(BlockDriverState *bs,
1374                        void (*change_cb)(void *opaque, int reason),
1375                        void *opaque)
1376{
1377    bs->change_cb = change_cb;
1378    bs->change_opaque = opaque;
1379}
1380
1381int bdrv_is_encrypted(BlockDriverState *bs)
1382{
1383    if (bs->backing_hd && bs->backing_hd->encrypted)
1384        return 1;
1385    return bs->encrypted;
1386}
1387
1388int bdrv_key_required(BlockDriverState *bs)
1389{
1390    BlockDriverState *backing_hd = bs->backing_hd;
1391
1392    if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1393        return 1;
1394    return (bs->encrypted && !bs->valid_key);
1395}
1396
1397int bdrv_set_key(BlockDriverState *bs, const char *key)
1398{
1399    int ret;
1400    if (bs->backing_hd && bs->backing_hd->encrypted) {
1401        ret = bdrv_set_key(bs->backing_hd, key);
1402        if (ret < 0)
1403            return ret;
1404        if (!bs->encrypted)
1405            return 0;
1406    }
1407    if (!bs->encrypted) {
1408        return -EINVAL;
1409    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1410        return -ENOMEDIUM;
1411    }
1412    ret = bs->drv->bdrv_set_key(bs, key);
1413    if (ret < 0) {
1414        bs->valid_key = 0;
1415    } else if (!bs->valid_key) {
1416        bs->valid_key = 1;
1417        /* call the change callback now, we skipped it on open */
1418        bs->media_changed = 1;
1419        if (bs->change_cb)
1420            bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
1421    }
1422    return ret;
1423}
1424
1425void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1426{
1427    if (!bs->drv) {
1428        buf[0] = '\0';
1429    } else {
1430        pstrcpy(buf, buf_size, bs->drv->format_name);
1431    }
1432}
1433
1434void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1435                         void *opaque)
1436{
1437    BlockDriver *drv;
1438
1439    QLIST_FOREACH(drv, &bdrv_drivers, list) {
1440        it(opaque, drv->format_name);
1441    }
1442}
1443
1444BlockDriverState *bdrv_find(const char *name)
1445{
1446    BlockDriverState *bs;
1447
1448    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1449        if (!strcmp(name, bs->device_name)) {
1450            return bs;
1451        }
1452    }
1453    return NULL;
1454}
1455
1456BlockDriverState *bdrv_next(BlockDriverState *bs)
1457{
1458    if (!bs) {
1459        return QTAILQ_FIRST(&bdrv_states);
1460    }
1461    return QTAILQ_NEXT(bs, list);
1462}
1463
1464void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1465{
1466    BlockDriverState *bs;
1467
1468    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1469        it(opaque, bs);
1470    }
1471}
1472
1473const char *bdrv_get_device_name(BlockDriverState *bs)
1474{
1475    return bs->device_name;
1476}
1477
1478int bdrv_flush(BlockDriverState *bs)
1479{
1480    if (bs->open_flags & BDRV_O_NO_FLUSH) {
1481        return 0;
1482    }
1483
1484    if (bs->drv && bs->drv->bdrv_flush) {
1485        return bs->drv->bdrv_flush(bs);
1486    }
1487
1488    /*
1489     * Some block drivers always operate in either writethrough or unsafe mode
1490     * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1491     * the server works (because the behaviour is hardcoded or depends on
1492     * server-side configuration), so we can't ensure that everything is safe
1493     * on disk. Returning an error doesn't work because that would break guests
1494     * even if the server operates in writethrough mode.
1495     *
1496     * Let's hope the user knows what he's doing.
1497     */
1498    return 0;
1499}
1500
1501void bdrv_flush_all(void)
1502{
1503    BlockDriverState *bs;
1504
1505    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1506        if (bs->drv && !bdrv_is_read_only(bs) &&
1507            (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1508            bdrv_flush(bs);
1509        }
1510    }
1511}
1512
1513int bdrv_has_zero_init(BlockDriverState *bs)
1514{
1515    assert(bs->drv);
1516
1517    if (bs->drv->bdrv_has_zero_init) {
1518        return bs->drv->bdrv_has_zero_init(bs);
1519    }
1520
1521    return 1;
1522}
1523
1524int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1525{
1526    if (!bs->drv) {
1527        return -ENOMEDIUM;
1528    }
1529    if (!bs->drv->bdrv_discard) {
1530        return 0;
1531    }
1532    return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1533}
1534
1535/*
1536 * Returns true iff the specified sector is present in the disk image. Drivers
1537 * not implementing the functionality are assumed to not support backing files,
1538 * hence all their sectors are reported as allocated.
1539 *
1540 * 'pnum' is set to the number of sectors (including and immediately following
1541 * the specified sector) that are known to be in the same
1542 * allocated/unallocated state.
1543 *
1544 * 'nb_sectors' is the max value 'pnum' should be set to.
1545 */
1546int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1547        int *pnum)
1548{
1549    int64_t n;
1550    if (!bs->drv->bdrv_is_allocated) {
1551        if (sector_num >= bs->total_sectors) {
1552            *pnum = 0;
1553            return 0;
1554        }
1555        n = bs->total_sectors - sector_num;
1556        *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1557        return 1;
1558    }
1559    return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1560}
1561
1562void bdrv_mon_event(const BlockDriverState *bdrv,
1563                    BlockMonEventAction action, int is_read)
1564{
1565    QObject *data;
1566    const char *action_str;
1567
1568    switch (action) {
1569    case BDRV_ACTION_REPORT:
1570        action_str = "report";
1571        break;
1572    case BDRV_ACTION_IGNORE:
1573        action_str = "ignore";
1574        break;
1575    case BDRV_ACTION_STOP:
1576        action_str = "stop";
1577        break;
1578    default:
1579        abort();
1580    }
1581
1582    data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1583                              bdrv->device_name,
1584                              action_str,
1585                              is_read ? "read" : "write");
1586    monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1587
1588    qobject_decref(data);
1589}
1590
1591static void bdrv_print_dict(QObject *obj, void *opaque)
1592{
1593    QDict *bs_dict;
1594    Monitor *mon = opaque;
1595
1596    bs_dict = qobject_to_qdict(obj);
1597
1598    monitor_printf(mon, "%s: type=%s removable=%d",
1599                        qdict_get_str(bs_dict, "device"),
1600                        qdict_get_str(bs_dict, "type"),
1601                        qdict_get_bool(bs_dict, "removable"));
1602
1603    if (qdict_get_bool(bs_dict, "removable")) {
1604        monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1605    }
1606
1607    if (qdict_haskey(bs_dict, "inserted")) {
1608        QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1609
1610        monitor_printf(mon, " file=");
1611        monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1612        if (qdict_haskey(qdict, "backing_file")) {
1613            monitor_printf(mon, " backing_file=");
1614            monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1615        }
1616        monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1617                            qdict_get_bool(qdict, "ro"),
1618                            qdict_get_str(qdict, "drv"),
1619                            qdict_get_bool(qdict, "encrypted"));
1620    } else {
1621        monitor_printf(mon, " [not inserted]");
1622    }
1623
1624    monitor_printf(mon, "\n");
1625}
1626
1627void bdrv_info_print(Monitor *mon, const QObject *data)
1628{
1629    qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1630}
1631
1632void bdrv_info(Monitor *mon, QObject **ret_data)
1633{
1634    QList *bs_list;
1635    BlockDriverState *bs;
1636
1637    bs_list = qlist_new();
1638
1639    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1640        QObject *bs_obj;
1641        const char *type = "unknown";
1642
1643        switch(bs->type) {
1644        case BDRV_TYPE_HD:
1645            type = "hd";
1646            break;
1647        case BDRV_TYPE_CDROM:
1648            type = "cdrom";
1649            break;
1650        case BDRV_TYPE_FLOPPY:
1651            type = "floppy";
1652            break;
1653        }
1654
1655        bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, "
1656                                    "'removable': %i, 'locked': %i }",
1657                                    bs->device_name, type, bs->removable,
1658                                    bs->locked);
1659
1660        if (bs->drv) {
1661            QObject *obj;
1662            QDict *bs_dict = qobject_to_qdict(bs_obj);
1663
1664            obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1665                                     "'encrypted': %i }",
1666                                     bs->filename, bs->read_only,
1667                                     bs->drv->format_name,
1668                                     bdrv_is_encrypted(bs));
1669            if (bs->backing_file[0] != '\0') {
1670                QDict *qdict = qobject_to_qdict(obj);
1671                qdict_put(qdict, "backing_file",
1672                          qstring_from_str(bs->backing_file));
1673            }
1674
1675            qdict_put_obj(bs_dict, "inserted", obj);
1676        }
1677        qlist_append_obj(bs_list, bs_obj);
1678    }
1679
1680    *ret_data = QOBJECT(bs_list);
1681}
1682
1683static void bdrv_stats_iter(QObject *data, void *opaque)
1684{
1685    QDict *qdict;
1686    Monitor *mon = opaque;
1687
1688    qdict = qobject_to_qdict(data);
1689    monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1690
1691    qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1692    monitor_printf(mon, " rd_bytes=%" PRId64
1693                        " wr_bytes=%" PRId64
1694                        " rd_operations=%" PRId64
1695                        " wr_operations=%" PRId64
1696                        "\n",
1697                        qdict_get_int(qdict, "rd_bytes"),
1698                        qdict_get_int(qdict, "wr_bytes"),
1699                        qdict_get_int(qdict, "rd_operations"),
1700                        qdict_get_int(qdict, "wr_operations"));
1701}
1702
1703void bdrv_stats_print(Monitor *mon, const QObject *data)
1704{
1705    qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1706}
1707
1708static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1709{
1710    QObject *res;
1711    QDict *dict;
1712
1713    res = qobject_from_jsonf("{ 'stats': {"
1714                             "'rd_bytes': %" PRId64 ","
1715                             "'wr_bytes': %" PRId64 ","
1716                             "'rd_operations': %" PRId64 ","
1717                             "'wr_operations': %" PRId64 ","
1718                             "'wr_highest_offset': %" PRId64
1719                             "} }",
1720                             bs->rd_bytes, bs->wr_bytes,
1721                             bs->rd_ops, bs->wr_ops,
1722                             bs->wr_highest_sector *
1723                             (uint64_t)BDRV_SECTOR_SIZE);
1724    dict  = qobject_to_qdict(res);
1725
1726    if (*bs->device_name) {
1727        qdict_put(dict, "device", qstring_from_str(bs->device_name));
1728    }
1729
1730    if (bs->file) {
1731        QObject *parent = bdrv_info_stats_bs(bs->file);
1732        qdict_put_obj(dict, "parent", parent);
1733    }
1734
1735    return res;
1736}
1737
1738void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1739{
1740    QObject *obj;
1741    QList *devices;
1742    BlockDriverState *bs;
1743
1744    devices = qlist_new();
1745
1746    QTAILQ_FOREACH(bs, &bdrv_states, list) {
1747        obj = bdrv_info_stats_bs(bs);
1748        qlist_append_obj(devices, obj);
1749    }
1750
1751    *ret_data = QOBJECT(devices);
1752}
1753
1754const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1755{
1756    if (bs->backing_hd && bs->backing_hd->encrypted)
1757        return bs->backing_file;
1758    else if (bs->encrypted)
1759        return bs->filename;
1760    else
1761        return NULL;
1762}
1763
1764void bdrv_get_backing_filename(BlockDriverState *bs,
1765                               char *filename, int filename_size)
1766{
1767    if (!bs->backing_file) {
1768        pstrcpy(filename, filename_size, "");
1769    } else {
1770        pstrcpy(filename, filename_size, bs->backing_file);
1771    }
1772}
1773
1774int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1775                          const uint8_t *buf, int nb_sectors)
1776{
1777    BlockDriver *drv = bs->drv;
1778    if (!drv)
1779        return -ENOMEDIUM;
1780    if (!drv->bdrv_write_compressed)
1781        return -ENOTSUP;
1782    if (bdrv_check_request(bs, sector_num, nb_sectors))
1783        return -EIO;
1784
1785    if (bs->dirty_bitmap) {
1786        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1787    }
1788
1789    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1790}
1791
1792int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1793{
1794    BlockDriver *drv = bs->drv;
1795    if (!drv)
1796        return -ENOMEDIUM;
1797    if (!drv->bdrv_get_info)
1798        return -ENOTSUP;
1799    memset(bdi, 0, sizeof(*bdi));
1800    return drv->bdrv_get_info(bs, bdi);
1801}
1802
1803int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1804                      int64_t pos, int size)
1805{
1806    BlockDriver *drv = bs->drv;
1807    if (!drv)
1808        return -ENOMEDIUM;
1809    if (drv->bdrv_save_vmstate)
1810        return drv->bdrv_save_vmstate(bs, buf, pos, size);
1811    if (bs->file)
1812        return bdrv_save_vmstate(bs->file, buf, pos, size);
1813    return -ENOTSUP;
1814}
1815
1816int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1817                      int64_t pos, int size)
1818{
1819    BlockDriver *drv = bs->drv;
1820    if (!drv)
1821        return -ENOMEDIUM;
1822    if (drv->bdrv_load_vmstate)
1823        return drv->bdrv_load_vmstate(bs, buf, pos, size);
1824    if (bs->file)
1825        return bdrv_load_vmstate(bs->file, buf, pos, size);
1826    return -ENOTSUP;
1827}
1828
1829void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1830{
1831    BlockDriver *drv = bs->drv;
1832
1833    if (!drv || !drv->bdrv_debug_event) {
1834        return;
1835    }
1836
1837    return drv->bdrv_debug_event(bs, event);
1838
1839}
1840
1841/**************************************************************/
1842/* handling of snapshots */
1843
1844int bdrv_can_snapshot(BlockDriverState *bs)
1845{
1846    BlockDriver *drv = bs->drv;
1847    if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
1848        return 0;
1849    }
1850
1851    if (!drv->bdrv_snapshot_create) {
1852        if (bs->file != NULL) {
1853            return bdrv_can_snapshot(bs->file);
1854        }
1855        return 0;
1856    }
1857
1858    return 1;
1859}
1860
1861int bdrv_is_snapshot(BlockDriverState *bs)
1862{
1863    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
1864}
1865
1866BlockDriverState *bdrv_snapshots(void)
1867{
1868    BlockDriverState *bs;
1869
1870    if (bs_snapshots) {
1871        return bs_snapshots;
1872    }
1873
1874    bs = NULL;
1875    while ((bs = bdrv_next(bs))) {
1876        if (bdrv_can_snapshot(bs)) {
1877            bs_snapshots = bs;
1878            return bs;
1879        }
1880    }
1881    return NULL;
1882}
1883
1884int bdrv_snapshot_create(BlockDriverState *bs,
1885                         QEMUSnapshotInfo *sn_info)
1886{
1887    BlockDriver *drv = bs->drv;
1888    if (!drv)
1889        return -ENOMEDIUM;
1890    if (drv->bdrv_snapshot_create)
1891        return drv->bdrv_snapshot_create(bs, sn_info);
1892    if (bs->file)
1893        return bdrv_snapshot_create(bs->file, sn_info);
1894    return -ENOTSUP;
1895}
1896
1897int bdrv_snapshot_goto(BlockDriverState *bs,
1898                       const char *snapshot_id)
1899{
1900    BlockDriver *drv = bs->drv;
1901    int ret, open_ret;
1902
1903    if (!drv)
1904        return -ENOMEDIUM;
1905    if (drv->bdrv_snapshot_goto)
1906        return drv->bdrv_snapshot_goto(bs, snapshot_id);
1907
1908    if (bs->file) {
1909        drv->bdrv_close(bs);
1910        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
1911        open_ret = drv->bdrv_open(bs, bs->open_flags);
1912        if (open_ret < 0) {
1913            bdrv_delete(bs->file);
1914            bs->drv = NULL;
1915            return open_ret;
1916        }
1917        return ret;
1918    }
1919
1920    return -ENOTSUP;
1921}
1922
1923int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1924{
1925    BlockDriver *drv = bs->drv;
1926    if (!drv)
1927        return -ENOMEDIUM;
1928    if (drv->bdrv_snapshot_delete)
1929        return drv->bdrv_snapshot_delete(bs, snapshot_id);
1930    if (bs->file)
1931        return bdrv_snapshot_delete(bs->file, snapshot_id);
1932    return -ENOTSUP;
1933}
1934
1935int bdrv_snapshot_list(BlockDriverState *bs,
1936                       QEMUSnapshotInfo **psn_info)
1937{
1938    BlockDriver *drv = bs->drv;
1939    if (!drv)
1940        return -ENOMEDIUM;
1941    if (drv->bdrv_snapshot_list)
1942        return drv->bdrv_snapshot_list(bs, psn_info);
1943    if (bs->file)
1944        return bdrv_snapshot_list(bs->file, psn_info);
1945    return -ENOTSUP;
1946}
1947
1948int bdrv_snapshot_load_tmp(BlockDriverState *bs,
1949        const char *snapshot_name)
1950{
1951    BlockDriver *drv = bs->drv;
1952    if (!drv) {
1953        return -ENOMEDIUM;
1954    }
1955    if (!bs->read_only) {
1956        return -EINVAL;
1957    }
1958    if (drv->bdrv_snapshot_load_tmp) {
1959        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
1960    }
1961    return -ENOTSUP;
1962}
1963
1964#define NB_SUFFIXES 4
1965
1966char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1967{
1968    static const char suffixes[NB_SUFFIXES] = "KMGT";
1969    int64_t base;
1970    int i;
1971
1972    if (size <= 999) {
1973        snprintf(buf, buf_size, "%" PRId64, size);
1974    } else {
1975        base = 1024;
1976        for(i = 0; i < NB_SUFFIXES; i++) {
1977            if (size < (10 * base)) {
1978                snprintf(buf, buf_size, "%0.1f%c",
1979                         (double)size / base,
1980                         suffixes[i]);
1981                break;
1982            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1983                snprintf(buf, buf_size, "%" PRId64 "%c",
1984                         ((size + (base >> 1)) / base),
1985                         suffixes[i]);
1986                break;
1987            }
1988            base = base * 1024;
1989        }
1990    }
1991    return buf;
1992}
1993
1994char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1995{
1996    char buf1[128], date_buf[128], clock_buf[128];
1997#ifdef _WIN32
1998    struct tm *ptm;
1999#else
2000    struct tm tm;
2001#endif
2002    time_t ti;
2003    int64_t secs;
2004
2005    if (!sn) {
2006        snprintf(buf, buf_size,
2007                 "%-10s%-20s%7s%20s%15s",
2008                 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2009    } else {
2010        ti = sn->date_sec;
2011#ifdef _WIN32
2012        ptm = localtime(&ti);
2013        strftime(date_buf, sizeof(date_buf),
2014                 "%Y-%m-%d %H:%M:%S", ptm);
2015#else
2016        localtime_r(&ti, &tm);
2017        strftime(date_buf, sizeof(date_buf),
2018                 "%Y-%m-%d %H:%M:%S", &tm);
2019#endif
2020        secs = sn->vm_clock_nsec / 1000000000;
2021        snprintf(clock_buf, sizeof(clock_buf),
2022                 "%02d:%02d:%02d.%03d",
2023                 (int)(secs / 3600),
2024                 (int)((secs / 60) % 60),
2025                 (int)(secs % 60),
2026                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2027        snprintf(buf, buf_size,
2028                 "%-10s%-20s%7s%20s%15s",
2029                 sn->id_str, sn->name,
2030                 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2031                 date_buf,
2032                 clock_buf);
2033    }
2034    return buf;
2035}
2036
2037
2038/**************************************************************/
2039/* async I/Os */
2040
2041BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2042                                 QEMUIOVector *qiov, int nb_sectors,
2043                                 BlockDriverCompletionFunc *cb, void *opaque)
2044{
2045    BlockDriver *drv = bs->drv;
2046    BlockDriverAIOCB *ret;
2047
2048    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2049
2050    if (!drv)
2051        return NULL;
2052    if (bdrv_check_request(bs, sector_num, nb_sectors))
2053        return NULL;
2054
2055    ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2056                              cb, opaque);
2057
2058    if (ret) {
2059        /* Update stats even though technically transfer has not happened. */
2060        bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2061        bs->rd_ops ++;
2062    }
2063
2064    return ret;
2065}
2066
2067typedef struct BlockCompleteData {
2068    BlockDriverCompletionFunc *cb;
2069    void *opaque;
2070    BlockDriverState *bs;
2071    int64_t sector_num;
2072    int nb_sectors;
2073} BlockCompleteData;
2074
2075static void block_complete_cb(void *opaque, int ret)
2076{
2077    BlockCompleteData *b = opaque;
2078
2079    if (b->bs->dirty_bitmap) {
2080        set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2081    }
2082    b->cb(b->opaque, ret);
2083    qemu_free(b);
2084}
2085
2086static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2087                                             int64_t sector_num,
2088                                             int nb_sectors,
2089                                             BlockDriverCompletionFunc *cb,
2090                                             void *opaque)
2091{
2092    BlockCompleteData *blkdata = qemu_mallocz(sizeof(BlockCompleteData));
2093
2094    blkdata->bs = bs;
2095    blkdata->cb = cb;
2096    blkdata->opaque = opaque;
2097    blkdata->sector_num = sector_num;
2098    blkdata->nb_sectors = nb_sectors;
2099
2100    return blkdata;
2101}
2102
2103BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2104                                  QEMUIOVector *qiov, int nb_sectors,
2105                                  BlockDriverCompletionFunc *cb, void *opaque)
2106{
2107    BlockDriver *drv = bs->drv;
2108    BlockDriverAIOCB *ret;
2109    BlockCompleteData *blk_cb_data;
2110
2111    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2112
2113    if (!drv)
2114        return NULL;
2115    if (bs->read_only)
2116        return NULL;
2117    if (bdrv_check_request(bs, sector_num, nb_sectors))
2118        return NULL;
2119
2120    if (bs->dirty_bitmap) {
2121        blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2122                                         opaque);
2123        cb = &block_complete_cb;
2124        opaque = blk_cb_data;
2125    }
2126
2127    ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2128                               cb, opaque);
2129
2130    if (ret) {
2131        /* Update stats even though technically transfer has not happened. */
2132        bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2133        bs->wr_ops ++;
2134        if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2135            bs->wr_highest_sector = sector_num + nb_sectors - 1;
2136        }
2137    }
2138
2139    return ret;
2140}
2141
2142
2143typedef struct MultiwriteCB {
2144    int error;
2145    int num_requests;
2146    int num_callbacks;
2147    struct {
2148        BlockDriverCompletionFunc *cb;
2149        void *opaque;
2150        QEMUIOVector *free_qiov;
2151        void *free_buf;
2152    } callbacks[];
2153} MultiwriteCB;
2154
2155static void multiwrite_user_cb(MultiwriteCB *mcb)
2156{
2157    int i;
2158
2159    for (i = 0; i < mcb->num_callbacks; i++) {
2160        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2161        if (mcb->callbacks[i].free_qiov) {
2162            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2163        }
2164        qemu_free(mcb->callbacks[i].free_qiov);
2165        qemu_vfree(mcb->callbacks[i].free_buf);
2166    }
2167}
2168
2169static void multiwrite_cb(void *opaque, int ret)
2170{
2171    MultiwriteCB *mcb = opaque;
2172
2173    trace_multiwrite_cb(mcb, ret);
2174
2175    if (ret < 0 && !mcb->error) {
2176        mcb->error = ret;
2177    }
2178
2179    mcb->num_requests--;
2180    if (mcb->num_requests == 0) {
2181        multiwrite_user_cb(mcb);
2182        qemu_free(mcb);
2183    }
2184}
2185
2186static int multiwrite_req_compare(const void *a, const void *b)
2187{
2188    const BlockRequest *req1 = a, *req2 = b;
2189
2190    /*
2191     * Note that we can't simply subtract req2->sector from req1->sector
2192     * here as that could overflow the return value.
2193     */
2194    if (req1->sector > req2->sector) {
2195        return 1;
2196    } else if (req1->sector < req2->sector) {
2197        return -1;
2198    } else {
2199        return 0;
2200    }
2201}
2202
2203/*
2204 * Takes a bunch of requests and tries to merge them. Returns the number of
2205 * requests that remain after merging.
2206 */
2207static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2208    int num_reqs, MultiwriteCB *mcb)
2209{
2210    int i, outidx;
2211
2212    // Sort requests by start sector
2213    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2214
2215    // Check if adjacent requests touch the same clusters. If so, combine them,
2216    // filling up gaps with zero sectors.
2217    outidx = 0;
2218    for (i = 1; i < num_reqs; i++) {
2219        int merge = 0;
2220        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2221
2222        // This handles the cases that are valid for all block drivers, namely
2223        // exactly sequential writes and overlapping writes.
2224        if (reqs[i].sector <= oldreq_last) {
2225            merge = 1;
2226        }
2227
2228        // The block driver may decide that it makes sense to combine requests
2229        // even if there is a gap of some sectors between them. In this case,
2230        // the gap is filled with zeros (therefore only applicable for yet
2231        // unused space in format like qcow2).
2232        if (!merge && bs->drv->bdrv_merge_requests) {
2233            merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2234        }
2235
2236        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2237            merge = 0;
2238        }
2239
2240        if (merge) {
2241            size_t size;
2242            QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
2243            qemu_iovec_init(qiov,
2244                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2245
2246            // Add the first request to the merged one. If the requests are
2247            // overlapping, drop the last sectors of the first request.
2248            size = (reqs[i].sector - reqs[outidx].sector) << 9;
2249            qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2250
2251            // We might need to add some zeros between the two requests
2252            if (reqs[i].sector > oldreq_last) {
2253                size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2254                uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2255                memset(buf, 0, zero_bytes);
2256                qemu_iovec_add(qiov, buf, zero_bytes);
2257                mcb->callbacks[i].free_buf = buf;
2258            }
2259
2260            // Add the second request
2261            qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2262
2263            reqs[outidx].nb_sectors = qiov->size >> 9;
2264            reqs[outidx].qiov = qiov;
2265
2266            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2267        } else {
2268            outidx++;
2269            reqs[outidx].sector     = reqs[i].sector;
2270            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2271            reqs[outidx].qiov       = reqs[i].qiov;
2272        }
2273    }
2274
2275    return outidx + 1;
2276}
2277
2278/*
2279 * Submit multiple AIO write requests at once.
2280 *
2281 * On success, the function returns 0 and all requests in the reqs array have
2282 * been submitted. In error case this function returns -1, and any of the
2283 * requests may or may not be submitted yet. In particular, this means that the
2284 * callback will be called for some of the requests, for others it won't. The
2285 * caller must check the error field of the BlockRequest to wait for the right
2286 * callbacks (if error != 0, no callback will be called).
2287 *
2288 * The implementation may modify the contents of the reqs array, e.g. to merge
2289 * requests. However, the fields opaque and error are left unmodified as they
2290 * are used to signal failure for a single request to the caller.
2291 */
2292int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2293{
2294    BlockDriverAIOCB *acb;
2295    MultiwriteCB *mcb;
2296    int i;
2297
2298    if (num_reqs == 0) {
2299        return 0;
2300    }
2301
2302    // Create MultiwriteCB structure
2303    mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2304    mcb->num_requests = 0;
2305    mcb->num_callbacks = num_reqs;
2306
2307    for (i = 0; i < num_reqs; i++) {
2308        mcb->callbacks[i].cb = reqs[i].cb;
2309        mcb->callbacks[i].opaque = reqs[i].opaque;
2310    }
2311
2312    // Check for mergable requests
2313    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2314
2315    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2316
2317    /*
2318     * Run the aio requests. As soon as one request can't be submitted
2319     * successfully, fail all requests that are not yet submitted (we must
2320     * return failure for all requests anyway)
2321     *
2322     * num_requests cannot be set to the right value immediately: If
2323     * bdrv_aio_writev fails for some request, num_requests would be too high
2324     * and therefore multiwrite_cb() would never recognize the multiwrite
2325     * request as completed. We also cannot use the loop variable i to set it
2326     * when the first request fails because the callback may already have been
2327     * called for previously submitted requests. Thus, num_requests must be
2328     * incremented for each request that is submitted.
2329     *
2330     * The problem that callbacks may be called early also means that we need
2331     * to take care that num_requests doesn't become 0 before all requests are
2332     * submitted - multiwrite_cb() would consider the multiwrite request
2333     * completed. A dummy request that is "completed" by a manual call to
2334     * multiwrite_cb() takes care of this.
2335     */
2336    mcb->num_requests = 1;
2337
2338    // Run the aio requests
2339    for (i = 0; i < num_reqs; i++) {
2340        mcb->num_requests++;
2341        acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2342            reqs[i].nb_sectors, multiwrite_cb, mcb);
2343
2344        if (acb == NULL) {
2345            // We can only fail the whole thing if no request has been
2346            // submitted yet. Otherwise we'll wait for the submitted AIOs to
2347            // complete and report the error in the callback.
2348            if (i == 0) {
2349                trace_bdrv_aio_multiwrite_earlyfail(mcb);
2350                goto fail;
2351            } else {
2352                trace_bdrv_aio_multiwrite_latefail(mcb, i);
2353                multiwrite_cb(mcb, -EIO);
2354                break;
2355            }
2356        }
2357    }
2358
2359    /* Complete the dummy request */
2360    multiwrite_cb(mcb, 0);
2361
2362    return 0;
2363
2364fail:
2365    for (i = 0; i < mcb->num_callbacks; i++) {
2366        reqs[i].error = -EIO;
2367    }
2368    qemu_free(mcb);
2369    return -1;
2370}
2371
2372BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2373        BlockDriverCompletionFunc *cb, void *opaque)
2374{
2375    BlockDriver *drv = bs->drv;
2376
2377    if (bs->open_flags & BDRV_O_NO_FLUSH) {
2378        return bdrv_aio_noop_em(bs, cb, opaque);
2379    }
2380
2381    if (!drv)
2382        return NULL;
2383    return drv->bdrv_aio_flush(bs, cb, opaque);
2384}
2385
2386void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2387{
2388    acb->pool->cancel(acb);
2389}
2390
2391
2392/**************************************************************/
2393/* async block device emulation */
2394
2395typedef struct BlockDriverAIOCBSync {
2396    BlockDriverAIOCB common;
2397    QEMUBH *bh;
2398    int ret;
2399    /* vector translation state */
2400    QEMUIOVector *qiov;
2401    uint8_t *bounce;
2402    int is_write;
2403} BlockDriverAIOCBSync;
2404
2405static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2406{
2407    BlockDriverAIOCBSync *acb =
2408        container_of(blockacb, BlockDriverAIOCBSync, common);
2409    qemu_bh_delete(acb->bh);
2410    acb->bh = NULL;
2411    qemu_aio_release(acb);
2412}
2413
2414static AIOPool bdrv_em_aio_pool = {
2415    .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2416    .cancel             = bdrv_aio_cancel_em,
2417};
2418
2419static void bdrv_aio_bh_cb(void *opaque)
2420{
2421    BlockDriverAIOCBSync *acb = opaque;
2422
2423    if (!acb->is_write)
2424        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2425    qemu_vfree(acb->bounce);
2426    acb->common.cb(acb->common.opaque, acb->ret);
2427    qemu_bh_delete(acb->bh);
2428    acb->bh = NULL;
2429    qemu_aio_release(acb);
2430}
2431
2432static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2433                                            int64_t sector_num,
2434                                            QEMUIOVector *qiov,
2435                                            int nb_sectors,
2436                                            BlockDriverCompletionFunc *cb,
2437                                            void *opaque,
2438                                            int is_write)
2439
2440{
2441    BlockDriverAIOCBSync *acb;
2442
2443    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2444    acb->is_write = is_write;
2445    acb->qiov = qiov;
2446    acb->bounce = qemu_blockalign(bs, qiov->size);
2447
2448    if (!acb->bh)
2449        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2450
2451    if (is_write) {
2452        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2453        acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2454    } else {
2455        acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2456    }
2457
2458    qemu_bh_schedule(acb->bh);
2459
2460    return &acb->common;
2461}
2462
2463static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2464        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2465        BlockDriverCompletionFunc *cb, void *opaque)
2466{
2467    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2468}
2469
2470static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2471        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2472        BlockDriverCompletionFunc *cb, void *opaque)
2473{
2474    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2475}
2476
2477static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2478        BlockDriverCompletionFunc *cb, void *opaque)
2479{
2480    BlockDriverAIOCBSync *acb;
2481
2482    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2483    acb->is_write = 1; /* don't bounce in the completion hadler */
2484    acb->qiov = NULL;
2485    acb->bounce = NULL;
2486    acb->ret = 0;
2487
2488    if (!acb->bh)
2489        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2490
2491    bdrv_flush(bs);
2492    qemu_bh_schedule(acb->bh);
2493    return &acb->common;
2494}
2495
2496static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2497        BlockDriverCompletionFunc *cb, void *opaque)
2498{
2499    BlockDriverAIOCBSync *acb;
2500
2501    acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2502    acb->is_write = 1; /* don't bounce in the completion handler */
2503    acb->qiov = NULL;
2504    acb->bounce = NULL;
2505    acb->ret = 0;
2506
2507    if (!acb->bh) {
2508        acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2509    }
2510
2511    qemu_bh_schedule(acb->bh);
2512    return &acb->common;
2513}
2514
2515/**************************************************************/
2516/* sync block device emulation */
2517
2518static void bdrv_rw_em_cb(void *opaque, int ret)
2519{
2520    *(int *)opaque = ret;
2521}
2522
2523#define NOT_DONE 0x7fffffff
2524
2525static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2526                        uint8_t *buf, int nb_sectors)
2527{
2528    int async_ret;
2529    BlockDriverAIOCB *acb;
2530    struct iovec iov;
2531    QEMUIOVector qiov;
2532
2533    async_context_push();
2534
2535    async_ret = NOT_DONE;
2536    iov.iov_base = (void *)buf;
2537    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2538    qemu_iovec_init_external(&qiov, &iov, 1);
2539    acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2540        bdrv_rw_em_cb, &async_ret);
2541    if (acb == NULL) {
2542        async_ret = -1;
2543        goto fail;
2544    }
2545
2546    while (async_ret == NOT_DONE) {
2547        qemu_aio_wait();
2548    }
2549
2550
2551fail:
2552    async_context_pop();
2553    return async_ret;
2554}
2555
2556static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2557                         const uint8_t *buf, int nb_sectors)
2558{
2559    int async_ret;
2560    BlockDriverAIOCB *acb;
2561    struct iovec iov;
2562    QEMUIOVector qiov;
2563
2564    async_context_push();
2565
2566    async_ret = NOT_DONE;
2567    iov.iov_base = (void *)buf;
2568    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2569    qemu_iovec_init_external(&qiov, &iov, 1);
2570    acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2571        bdrv_rw_em_cb, &async_ret);
2572    if (acb == NULL) {
2573        async_ret = -1;
2574        goto fail;
2575    }
2576    while (async_ret == NOT_DONE) {
2577        qemu_aio_wait();
2578    }
2579
2580fail:
2581    async_context_pop();
2582    return async_ret;
2583}
2584
2585void bdrv_init(void)
2586{
2587    module_call_init(MODULE_INIT_BLOCK);
2588}
2589
2590void bdrv_init_with_whitelist(void)
2591{
2592    use_bdrv_whitelist = 1;
2593    bdrv_init();
2594}
2595
2596void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2597                   BlockDriverCompletionFunc *cb, void *opaque)
2598{
2599    BlockDriverAIOCB *acb;
2600
2601    if (pool->free_aiocb) {
2602        acb = pool->free_aiocb;
2603        pool->free_aiocb = acb->next;
2604    } else {
2605        acb = qemu_mallocz(pool->aiocb_size);
2606        acb->pool = pool;
2607    }
2608    acb->bs = bs;
2609    acb->cb = cb;
2610    acb->opaque = opaque;
2611    return acb;
2612}
2613
2614void qemu_aio_release(void *p)
2615{
2616    BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2617    AIOPool *pool = acb->pool;
2618    acb->next = pool->free_aiocb;
2619    pool->free_aiocb = acb;
2620}
2621
2622/**************************************************************/
2623/* removable device support */
2624
2625/**
2626 * Return TRUE if the media is present
2627 */
2628int bdrv_is_inserted(BlockDriverState *bs)
2629{
2630    BlockDriver *drv = bs->drv;
2631    int ret;
2632    if (!drv)
2633        return 0;
2634    if (!drv->bdrv_is_inserted)
2635        return !bs->tray_open;
2636    ret = drv->bdrv_is_inserted(bs);
2637    return ret;
2638}
2639
2640/**
2641 * Return TRUE if the media changed since the last call to this
2642 * function. It is currently only used for floppy disks
2643 */
2644int bdrv_media_changed(BlockDriverState *bs)
2645{
2646    BlockDriver *drv = bs->drv;
2647    int ret;
2648
2649    if (!drv || !drv->bdrv_media_changed)
2650        ret = -ENOTSUP;
2651    else
2652        ret = drv->bdrv_media_changed(bs);
2653    if (ret == -ENOTSUP)
2654        ret = bs->media_changed;
2655    bs->media_changed = 0;
2656    return ret;
2657}
2658
2659/**
2660 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2661 */
2662int bdrv_eject(BlockDriverState *bs, int eject_flag)
2663{
2664    BlockDriver *drv = bs->drv;
2665    int ret;
2666
2667    if (bs->locked) {
2668        return -EBUSY;
2669    }
2670
2671    if (!drv || !drv->bdrv_eject) {
2672        ret = -ENOTSUP;
2673    } else {
2674        ret = drv->bdrv_eject(bs, eject_flag);
2675    }
2676    if (ret == -ENOTSUP) {
2677        ret = 0;
2678    }
2679    if (ret >= 0) {
2680        bs->tray_open = eject_flag;
2681    }
2682
2683    return ret;
2684}
2685
2686int bdrv_is_locked(BlockDriverState *bs)
2687{
2688    return bs->locked;
2689}
2690
2691/**
2692 * Lock or unlock the media (if it is locked, the user won't be able
2693 * to eject it manually).
2694 */
2695void bdrv_set_locked(BlockDriverState *bs, int locked)
2696{
2697    BlockDriver *drv = bs->drv;
2698
2699    bs->locked = locked;
2700    if (drv && drv->bdrv_set_locked) {
2701        drv->bdrv_set_locked(bs, locked);
2702    }
2703}
2704
2705/* needed for generic scsi interface */
2706
2707int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2708{
2709    BlockDriver *drv = bs->drv;
2710
2711    if (drv && drv->bdrv_ioctl)
2712        return drv->bdrv_ioctl(bs, req, buf);
2713    return -ENOTSUP;
2714}
2715
2716BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2717        unsigned long int req, void *buf,
2718        BlockDriverCompletionFunc *cb, void *opaque)
2719{
2720    BlockDriver *drv = bs->drv;
2721
2722    if (drv && drv->bdrv_aio_ioctl)
2723        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2724    return NULL;
2725}
2726
2727
2728
2729void *qemu_blockalign(BlockDriverState *bs, size_t size)
2730{
2731    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2732}
2733
2734void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2735{
2736    int64_t bitmap_size;
2737
2738    bs->dirty_count = 0;
2739    if (enable) {
2740        if (!bs->dirty_bitmap) {
2741            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
2742                    BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
2743            bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
2744
2745            bs->dirty_bitmap = qemu_mallocz(bitmap_size);
2746        }
2747    } else {
2748        if (bs->dirty_bitmap) {
2749            qemu_free(bs->dirty_bitmap);
2750            bs->dirty_bitmap = NULL;
2751        }
2752    }
2753}
2754
2755int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
2756{
2757    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
2758
2759    if (bs->dirty_bitmap &&
2760        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
2761        return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
2762            (1UL << (chunk % (sizeof(unsigned long) * 8))));
2763    } else {
2764        return 0;
2765    }
2766}
2767
2768void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2769                      int nr_sectors)
2770{
2771    set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2772}
2773
2774int64_t bdrv_get_dirty_count(BlockDriverState *bs)
2775{
2776    return bs->dirty_count;
2777}
2778
2779void bdrv_set_in_use(BlockDriverState *bs, int in_use)
2780{
2781    assert(bs->in_use != in_use);
2782    bs->in_use = in_use;
2783}
2784
2785int bdrv_in_use(BlockDriverState *bs)
2786{
2787    return bs->in_use;
2788}
2789
2790int bdrv_img_create(const char *filename, const char *fmt,
2791                    const char *base_filename, const char *base_fmt,
2792                    char *options, uint64_t img_size, int flags)
2793{
2794    QEMUOptionParameter *param = NULL, *create_options = NULL;
2795    QEMUOptionParameter *backing_fmt, *backing_file;
2796    BlockDriverState *bs = NULL;
2797    BlockDriver *drv, *proto_drv;
2798    BlockDriver *backing_drv = NULL;
2799    int ret = 0;
2800
2801    /* Find driver and parse its options */
2802    drv = bdrv_find_format(fmt);
2803    if (!drv) {
2804        error_report("Unknown file format '%s'", fmt);
2805        ret = -EINVAL;
2806        goto out;
2807    }
2808
2809    proto_drv = bdrv_find_protocol(filename);
2810    if (!proto_drv) {
2811        error_report("Unknown protocol '%s'", filename);
2812        ret = -EINVAL;
2813        goto out;
2814    }
2815
2816    create_options = append_option_parameters(create_options,
2817                                              drv->create_options);
2818    create_options = append_option_parameters(create_options,
2819                                              proto_drv->create_options);
2820
2821    /* Create parameter list with default values */
2822    param = parse_option_parameters("", create_options, param);
2823
2824    set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
2825
2826    /* Parse -o options */
2827    if (options) {
2828        param = parse_option_parameters(options, create_options, param);
2829        if (param == NULL) {
2830            error_report("Invalid options for file format '%s'.", fmt);
2831            ret = -EINVAL;
2832            goto out;
2833        }
2834    }
2835
2836    if (base_filename) {
2837        if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
2838                                 base_filename)) {
2839            error_report("Backing file not supported for file format '%s'",
2840                         fmt);
2841            ret = -EINVAL;
2842            goto out;
2843        }
2844    }
2845
2846    if (base_fmt) {
2847        if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
2848            error_report("Backing file format not supported for file "
2849                         "format '%s'", fmt);
2850            ret = -EINVAL;
2851            goto out;
2852        }
2853    }
2854
2855    backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
2856    if (backing_file && backing_file->value.s) {
2857        if (!strcmp(filename, backing_file->value.s)) {
2858            error_report("Error: Trying to create an image with the "
2859                         "same filename as the backing file");
2860            ret = -EINVAL;
2861            goto out;
2862        }
2863    }
2864
2865    backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
2866    if (backing_fmt && backing_fmt->value.s) {
2867        backing_drv = bdrv_find_format(backing_fmt->value.s);
2868        if (!backing_drv) {
2869            error_report("Unknown backing file format '%s'",
2870                         backing_fmt->value.s);
2871            ret = -EINVAL;
2872            goto out;
2873        }
2874    }
2875
2876    // The size for the image must always be specified, with one exception:
2877    // If we are using a backing file, we can obtain the size from there
2878    if (get_option_parameter(param, BLOCK_OPT_SIZE)->value.n == -1) {
2879        if (backing_file && backing_file->value.s) {
2880            uint64_t size;
2881            char buf[32];
2882
2883            bs = bdrv_new("");
2884
2885            ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
2886            if (ret < 0) {
2887                error_report("Could not open '%s'", backing_file->value.s);
2888                goto out;
2889            }
2890            bdrv_get_geometry(bs, &size);
2891            size *= 512;
2892
2893            snprintf(buf, sizeof(buf), "%" PRId64, size);
2894            set_option_parameter(param, BLOCK_OPT_SIZE, buf);
2895        } else {
2896            error_report("Image creation needs a size parameter");
2897            ret = -EINVAL;
2898            goto out;
2899        }
2900    }
2901
2902    printf("Formatting '%s', fmt=%s ", filename, fmt);
2903    print_option_parameters(param);
2904    puts("");
2905
2906    ret = bdrv_create(drv, filename, param);
2907
2908    if (ret < 0) {
2909        if (ret == -ENOTSUP) {
2910            error_report("Formatting or formatting option not supported for "
2911                         "file format '%s'", fmt);
2912        } else if (ret == -EFBIG) {
2913            error_report("The image size is too large for file format '%s'",
2914                         fmt);
2915        } else {
2916            error_report("%s: error while creating %s: %s", filename, fmt,
2917                         strerror(-ret));
2918        }
2919    }
2920
2921out:
2922    free_option_parameters(create_options);
2923    free_option_parameters(param);
2924
2925    if (bs) {
2926        bdrv_delete(bs);
2927    }
2928
2929    return ret;
2930}
2931