qemu/block/parallels.c
<<
>>
Prefs
   1/*
   2 * Block driver for Parallels disk image format
   3 *
   4 * Copyright (c) 2007 Alex Beregszaszi
   5 * Copyright (c) 2015 Denis V. Lunev <den@openvz.org>
   6 *
   7 * This code was originally based on comparing different disk images created
   8 * by Parallels. Currently it is based on opened OpenVZ sources
   9 * available at
  10 *     http://git.openvz.org/?p=ploop;a=summary
  11 *
  12 * Permission is hereby granted, free of charge, to any person obtaining a copy
  13 * of this software and associated documentation files (the "Software"), to deal
  14 * in the Software without restriction, including without limitation the rights
  15 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  16 * copies of the Software, and to permit persons to whom the Software is
  17 * furnished to do so, subject to the following conditions:
  18 *
  19 * The above copyright notice and this permission notice shall be included in
  20 * all copies or substantial portions of the Software.
  21 *
  22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  27 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  28 * THE SOFTWARE.
  29 */
  30
  31#include "qemu/osdep.h"
  32#include "qemu/error-report.h"
  33#include "qapi/error.h"
  34#include "block/block_int.h"
  35#include "block/qdict.h"
  36#include "sysemu/block-backend.h"
  37#include "qemu/module.h"
  38#include "qemu/option.h"
  39#include "qapi/qmp/qdict.h"
  40#include "qapi/qobject-input-visitor.h"
  41#include "qapi/qapi-visit-block-core.h"
  42#include "qemu/bswap.h"
  43#include "qemu/bitmap.h"
  44#include "qemu/memalign.h"
  45#include "migration/blocker.h"
  46#include "parallels.h"
  47
  48/**************************************************************/
  49
  50#define HEADER_MAGIC "WithoutFreeSpace"
  51#define HEADER_MAGIC2 "WithouFreSpacExt"
  52#define HEADER_VERSION 2
  53#define HEADER_INUSE_MAGIC  (0x746F6E59)
  54#define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)
  55
  56static QEnumLookup prealloc_mode_lookup = {
  57    .array = (const char *const[]) {
  58        "falloc",
  59        "truncate",
  60    },
  61    .size = PRL_PREALLOC_MODE__MAX
  62};
  63
  64#define PARALLELS_OPT_PREALLOC_MODE     "prealloc-mode"
  65#define PARALLELS_OPT_PREALLOC_SIZE     "prealloc-size"
  66
  67static QemuOptsList parallels_runtime_opts = {
  68    .name = "parallels",
  69    .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head),
  70    .desc = {
  71        {
  72            .name = PARALLELS_OPT_PREALLOC_SIZE,
  73            .type = QEMU_OPT_SIZE,
  74            .help = "Preallocation size on image expansion",
  75            .def_value_str = "128M",
  76        },
  77        {
  78            .name = PARALLELS_OPT_PREALLOC_MODE,
  79            .type = QEMU_OPT_STRING,
  80            .help = "Preallocation mode on image expansion "
  81                    "(allowed values: falloc, truncate)",
  82            .def_value_str = "falloc",
  83        },
  84        { /* end of list */ },
  85    },
  86};
  87
  88static QemuOptsList parallels_create_opts = {
  89    .name = "parallels-create-opts",
  90    .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head),
  91    .desc = {
  92        {
  93            .name = BLOCK_OPT_SIZE,
  94            .type = QEMU_OPT_SIZE,
  95            .help = "Virtual disk size",
  96        },
  97        {
  98            .name = BLOCK_OPT_CLUSTER_SIZE,
  99            .type = QEMU_OPT_SIZE,
 100            .help = "Parallels image cluster size",
 101            .def_value_str = stringify(DEFAULT_CLUSTER_SIZE),
 102        },
 103        { /* end of list */ }
 104    }
 105};
 106
 107
 108static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx)
 109{
 110    return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier;
 111}
 112
 113static uint32_t bat_entry_off(uint32_t idx)
 114{
 115    return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx;
 116}
 117
 118static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num)
 119{
 120    uint32_t index, offset;
 121
 122    index = sector_num / s->tracks;
 123    offset = sector_num % s->tracks;
 124
 125    /* not allocated */
 126    if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) {
 127        return -1;
 128    }
 129    return bat2sect(s, index) + offset;
 130}
 131
 132static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
 133        int nb_sectors)
 134{
 135    int ret = s->tracks - sector_num % s->tracks;
 136    return MIN(nb_sectors, ret);
 137}
 138
 139static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
 140                            int nb_sectors, int *pnum)
 141{
 142    int64_t start_off = -2, prev_end_off = -2;
 143
 144    *pnum = 0;
 145    while (nb_sectors > 0 || start_off == -2) {
 146        int64_t offset = seek_to_sector(s, sector_num);
 147        int to_end;
 148
 149        if (start_off == -2) {
 150            start_off = offset;
 151            prev_end_off = offset;
 152        } else if (offset != prev_end_off) {
 153            break;
 154        }
 155
 156        to_end = cluster_remainder(s, sector_num, nb_sectors);
 157        nb_sectors -= to_end;
 158        sector_num += to_end;
 159        *pnum += to_end;
 160
 161        if (offset > 0) {
 162            prev_end_off += to_end;
 163        }
 164    }
 165    return start_off;
 166}
 167
 168static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
 169                                 int nb_sectors, int *pnum)
 170{
 171    int ret = 0;
 172    BDRVParallelsState *s = bs->opaque;
 173    int64_t pos, space, idx, to_allocate, i, len;
 174
 175    pos = block_status(s, sector_num, nb_sectors, pnum);
 176    if (pos > 0) {
 177        return pos;
 178    }
 179
 180    idx = sector_num / s->tracks;
 181    to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
 182
 183    /* This function is called only by parallels_co_writev(), which will never
 184     * pass a sector_num at or beyond the end of the image (because the block
 185     * layer never passes such a sector_num to that function). Therefore, idx
 186     * is always below s->bat_size.
 187     * block_status() will limit *pnum so that sector_num + *pnum will not
 188     * exceed the image end. Therefore, idx + to_allocate cannot exceed
 189     * s->bat_size.
 190     * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
 191     * will always fit into a uint32_t. */
 192    assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
 193
 194    space = to_allocate * s->tracks;
 195    len = bdrv_getlength(bs->file->bs);
 196    if (len < 0) {
 197        return len;
 198    }
 199    if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
 200        space += s->prealloc_size;
 201        /*
 202         * We require the expanded size to read back as zero. If the
 203         * user permitted truncation, we try that; but if it fails, we
 204         * force the safer-but-slower fallocate.
 205         */
 206        if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) {
 207            ret = bdrv_truncate(bs->file,
 208                                (s->data_end + space) << BDRV_SECTOR_BITS,
 209                                false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE,
 210                                NULL);
 211            if (ret == -ENOTSUP) {
 212                s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
 213            }
 214        }
 215        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
 216            ret = bdrv_pwrite_zeroes(bs->file,
 217                                     s->data_end << BDRV_SECTOR_BITS,
 218                                     space << BDRV_SECTOR_BITS, 0);
 219        }
 220        if (ret < 0) {
 221            return ret;
 222        }
 223    }
 224
 225    /* Try to read from backing to fill empty clusters
 226     * FIXME: 1. previous write_zeroes may be redundant
 227     *        2. most of data we read from backing will be rewritten by
 228     *           parallels_co_writev. On aligned-to-cluster write we do not need
 229     *           this read at all.
 230     *        3. it would be good to combine write of data from backing and new
 231     *           data into one write call */
 232    if (bs->backing) {
 233        int64_t nb_cow_sectors = to_allocate * s->tracks;
 234        int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
 235        void *buf = qemu_blockalign(bs, nb_cow_bytes);
 236
 237        ret = bdrv_co_pread(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE,
 238                            nb_cow_bytes, buf, 0);
 239        if (ret < 0) {
 240            qemu_vfree(buf);
 241            return ret;
 242        }
 243
 244        ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE,
 245                              nb_cow_bytes, buf, 0);
 246        qemu_vfree(buf);
 247        if (ret < 0) {
 248            return ret;
 249        }
 250    }
 251
 252    for (i = 0; i < to_allocate; i++) {
 253        s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
 254        s->data_end += s->tracks;
 255        bitmap_set(s->bat_dirty_bmap,
 256                   bat_entry_off(idx + i) / s->bat_dirty_block, 1);
 257    }
 258
 259    return bat2sect(s, idx) + sector_num % s->tracks;
 260}
 261
 262
 263static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
 264{
 265    BDRVParallelsState *s = bs->opaque;
 266    unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
 267    unsigned long bit;
 268
 269    qemu_co_mutex_lock(&s->lock);
 270
 271    bit = find_first_bit(s->bat_dirty_bmap, size);
 272    while (bit < size) {
 273        uint32_t off = bit * s->bat_dirty_block;
 274        uint32_t to_write = s->bat_dirty_block;
 275        int ret;
 276
 277        if (off + to_write > s->header_size) {
 278            to_write = s->header_size - off;
 279        }
 280        ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off,
 281                          to_write);
 282        if (ret < 0) {
 283            qemu_co_mutex_unlock(&s->lock);
 284            return ret;
 285        }
 286        bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1);
 287    }
 288    bitmap_zero(s->bat_dirty_bmap, size);
 289
 290    qemu_co_mutex_unlock(&s->lock);
 291    return 0;
 292}
 293
 294
 295static int coroutine_fn parallels_co_block_status(BlockDriverState *bs,
 296                                                  bool want_zero,
 297                                                  int64_t offset,
 298                                                  int64_t bytes,
 299                                                  int64_t *pnum,
 300                                                  int64_t *map,
 301                                                  BlockDriverState **file)
 302{
 303    BDRVParallelsState *s = bs->opaque;
 304    int count;
 305
 306    assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
 307    qemu_co_mutex_lock(&s->lock);
 308    offset = block_status(s, offset >> BDRV_SECTOR_BITS,
 309                          bytes >> BDRV_SECTOR_BITS, &count);
 310    qemu_co_mutex_unlock(&s->lock);
 311
 312    *pnum = count * BDRV_SECTOR_SIZE;
 313    if (offset < 0) {
 314        return 0;
 315    }
 316
 317    *map = offset * BDRV_SECTOR_SIZE;
 318    *file = bs->file->bs;
 319    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
 320}
 321
 322static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
 323                                            int64_t sector_num, int nb_sectors,
 324                                            QEMUIOVector *qiov, int flags)
 325{
 326    BDRVParallelsState *s = bs->opaque;
 327    uint64_t bytes_done = 0;
 328    QEMUIOVector hd_qiov;
 329    int ret = 0;
 330
 331    assert(!flags);
 332    qemu_iovec_init(&hd_qiov, qiov->niov);
 333
 334    while (nb_sectors > 0) {
 335        int64_t position;
 336        int n, nbytes;
 337
 338        qemu_co_mutex_lock(&s->lock);
 339        position = allocate_clusters(bs, sector_num, nb_sectors, &n);
 340        qemu_co_mutex_unlock(&s->lock);
 341        if (position < 0) {
 342            ret = (int)position;
 343            break;
 344        }
 345
 346        nbytes = n << BDRV_SECTOR_BITS;
 347
 348        qemu_iovec_reset(&hd_qiov);
 349        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
 350
 351        ret = bdrv_co_pwritev(bs->file, position * BDRV_SECTOR_SIZE, nbytes,
 352                              &hd_qiov, 0);
 353        if (ret < 0) {
 354            break;
 355        }
 356
 357        nb_sectors -= n;
 358        sector_num += n;
 359        bytes_done += nbytes;
 360    }
 361
 362    qemu_iovec_destroy(&hd_qiov);
 363    return ret;
 364}
 365
 366static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
 367        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 368{
 369    BDRVParallelsState *s = bs->opaque;
 370    uint64_t bytes_done = 0;
 371    QEMUIOVector hd_qiov;
 372    int ret = 0;
 373
 374    qemu_iovec_init(&hd_qiov, qiov->niov);
 375
 376    while (nb_sectors > 0) {
 377        int64_t position;
 378        int n, nbytes;
 379
 380        qemu_co_mutex_lock(&s->lock);
 381        position = block_status(s, sector_num, nb_sectors, &n);
 382        qemu_co_mutex_unlock(&s->lock);
 383
 384        nbytes = n << BDRV_SECTOR_BITS;
 385
 386        qemu_iovec_reset(&hd_qiov);
 387        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
 388
 389        if (position < 0) {
 390            if (bs->backing) {
 391                ret = bdrv_co_preadv(bs->backing, sector_num * BDRV_SECTOR_SIZE,
 392                                     nbytes, &hd_qiov, 0);
 393                if (ret < 0) {
 394                    break;
 395                }
 396            } else {
 397                qemu_iovec_memset(&hd_qiov, 0, 0, nbytes);
 398            }
 399        } else {
 400            ret = bdrv_co_preadv(bs->file, position * BDRV_SECTOR_SIZE, nbytes,
 401                                 &hd_qiov, 0);
 402            if (ret < 0) {
 403                break;
 404            }
 405        }
 406
 407        nb_sectors -= n;
 408        sector_num += n;
 409        bytes_done += nbytes;
 410    }
 411
 412    qemu_iovec_destroy(&hd_qiov);
 413    return ret;
 414}
 415
 416
 417static int coroutine_fn parallels_co_check(BlockDriverState *bs,
 418                                           BdrvCheckResult *res,
 419                                           BdrvCheckMode fix)
 420{
 421    BDRVParallelsState *s = bs->opaque;
 422    int64_t size, prev_off, high_off;
 423    int ret;
 424    uint32_t i;
 425    bool flush_bat = false;
 426
 427    size = bdrv_getlength(bs->file->bs);
 428    if (size < 0) {
 429        res->check_errors++;
 430        return size;
 431    }
 432
 433    qemu_co_mutex_lock(&s->lock);
 434    if (s->header_unclean) {
 435        fprintf(stderr, "%s image was not closed correctly\n",
 436                fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
 437        res->corruptions++;
 438        if (fix & BDRV_FIX_ERRORS) {
 439            /* parallels_close will do the job right */
 440            res->corruptions_fixed++;
 441            s->header_unclean = false;
 442        }
 443    }
 444
 445    res->bfi.total_clusters = s->bat_size;
 446    res->bfi.compressed_clusters = 0; /* compression is not supported */
 447
 448    high_off = 0;
 449    prev_off = 0;
 450    for (i = 0; i < s->bat_size; i++) {
 451        int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
 452        if (off == 0) {
 453            prev_off = 0;
 454            continue;
 455        }
 456
 457        /* cluster outside the image */
 458        if (off > size) {
 459            fprintf(stderr, "%s cluster %u is outside image\n",
 460                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
 461            res->corruptions++;
 462            if (fix & BDRV_FIX_ERRORS) {
 463                prev_off = 0;
 464                s->bat_bitmap[i] = 0;
 465                res->corruptions_fixed++;
 466                flush_bat = true;
 467                continue;
 468            }
 469        }
 470
 471        res->bfi.allocated_clusters++;
 472        if (off > high_off) {
 473            high_off = off;
 474        }
 475
 476        if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
 477            res->bfi.fragmented_clusters++;
 478        }
 479        prev_off = off;
 480    }
 481
 482    ret = 0;
 483    if (flush_bat) {
 484        ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
 485        if (ret < 0) {
 486            res->check_errors++;
 487            goto out;
 488        }
 489    }
 490
 491    res->image_end_offset = high_off + s->cluster_size;
 492    if (size > res->image_end_offset) {
 493        int64_t count;
 494        count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
 495        fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
 496                fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
 497                size - res->image_end_offset);
 498        res->leaks += count;
 499        if (fix & BDRV_FIX_LEAKS) {
 500            Error *local_err = NULL;
 501
 502            /*
 503             * In order to really repair the image, we must shrink it.
 504             * That means we have to pass exact=true.
 505             */
 506            ret = bdrv_truncate(bs->file, res->image_end_offset, true,
 507                                PREALLOC_MODE_OFF, 0, &local_err);
 508            if (ret < 0) {
 509                error_report_err(local_err);
 510                res->check_errors++;
 511                goto out;
 512            }
 513            res->leaks_fixed += count;
 514        }
 515    }
 516
 517out:
 518    qemu_co_mutex_unlock(&s->lock);
 519    return ret;
 520}
 521
 522
 523static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts,
 524                                            Error **errp)
 525{
 526    BlockdevCreateOptionsParallels *parallels_opts;
 527    BlockDriverState *bs;
 528    BlockBackend *blk;
 529    int64_t total_size, cl_size;
 530    uint32_t bat_entries, bat_sectors;
 531    ParallelsHeader header;
 532    uint8_t tmp[BDRV_SECTOR_SIZE];
 533    int ret;
 534
 535    assert(opts->driver == BLOCKDEV_DRIVER_PARALLELS);
 536    parallels_opts = &opts->u.parallels;
 537
 538    /* Sanity checks */
 539    total_size = parallels_opts->size;
 540
 541    if (parallels_opts->has_cluster_size) {
 542        cl_size = parallels_opts->cluster_size;
 543    } else {
 544        cl_size = DEFAULT_CLUSTER_SIZE;
 545    }
 546
 547    /* XXX What is the real limit here? This is an insanely large maximum. */
 548    if (cl_size >= INT64_MAX / MAX_PARALLELS_IMAGE_FACTOR) {
 549        error_setg(errp, "Cluster size is too large");
 550        return -EINVAL;
 551    }
 552    if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) {
 553        error_setg(errp, "Image size is too large for this cluster size");
 554        return -E2BIG;
 555    }
 556
 557    if (!QEMU_IS_ALIGNED(total_size, BDRV_SECTOR_SIZE)) {
 558        error_setg(errp, "Image size must be a multiple of 512 bytes");
 559        return -EINVAL;
 560    }
 561
 562    if (!QEMU_IS_ALIGNED(cl_size, BDRV_SECTOR_SIZE)) {
 563        error_setg(errp, "Cluster size must be a multiple of 512 bytes");
 564        return -EINVAL;
 565    }
 566
 567    /* Create BlockBackend to write to the image */
 568    bs = bdrv_open_blockdev_ref(parallels_opts->file, errp);
 569    if (bs == NULL) {
 570        return -EIO;
 571    }
 572
 573    blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
 574                          errp);
 575    if (!blk) {
 576        ret = -EPERM;
 577        goto out;
 578    }
 579    blk_set_allow_write_beyond_eof(blk, true);
 580
 581    /* Create image format */
 582    bat_entries = DIV_ROUND_UP(total_size, cl_size);
 583    bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size);
 584    bat_sectors = (bat_sectors *  cl_size) >> BDRV_SECTOR_BITS;
 585
 586    memset(&header, 0, sizeof(header));
 587    memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
 588    header.version = cpu_to_le32(HEADER_VERSION);
 589    /* don't care much about geometry, it is not used on image level */
 590    header.heads = cpu_to_le32(HEADS_NUMBER);
 591    header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE
 592                                   / HEADS_NUMBER / SEC_IN_CYL);
 593    header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
 594    header.bat_entries = cpu_to_le32(bat_entries);
 595    header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
 596    header.data_off = cpu_to_le32(bat_sectors);
 597
 598    /* write all the data */
 599    memset(tmp, 0, sizeof(tmp));
 600    memcpy(tmp, &header, sizeof(header));
 601
 602    ret = blk_pwrite(blk, 0, tmp, BDRV_SECTOR_SIZE, 0);
 603    if (ret < 0) {
 604        goto exit;
 605    }
 606    ret = blk_pwrite_zeroes(blk, BDRV_SECTOR_SIZE,
 607                            (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
 608    if (ret < 0) {
 609        goto exit;
 610    }
 611
 612    ret = 0;
 613out:
 614    blk_unref(blk);
 615    bdrv_unref(bs);
 616    return ret;
 617
 618exit:
 619    error_setg_errno(errp, -ret, "Failed to create Parallels image");
 620    goto out;
 621}
 622
 623static int coroutine_fn parallels_co_create_opts(BlockDriver *drv,
 624                                                 const char *filename,
 625                                                 QemuOpts *opts,
 626                                                 Error **errp)
 627{
 628    BlockdevCreateOptions *create_options = NULL;
 629    BlockDriverState *bs = NULL;
 630    QDict *qdict;
 631    Visitor *v;
 632    int ret;
 633
 634    static const QDictRenames opt_renames[] = {
 635        { BLOCK_OPT_CLUSTER_SIZE,       "cluster-size" },
 636        { NULL, NULL },
 637    };
 638
 639    /* Parse options and convert legacy syntax */
 640    qdict = qemu_opts_to_qdict_filtered(opts, NULL, &parallels_create_opts,
 641                                        true);
 642
 643    if (!qdict_rename_keys(qdict, opt_renames, errp)) {
 644        ret = -EINVAL;
 645        goto done;
 646    }
 647
 648    /* Create and open the file (protocol layer) */
 649    ret = bdrv_create_file(filename, opts, errp);
 650    if (ret < 0) {
 651        goto done;
 652    }
 653
 654    bs = bdrv_open(filename, NULL, NULL,
 655                   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
 656    if (bs == NULL) {
 657        ret = -EIO;
 658        goto done;
 659    }
 660
 661    /* Now get the QAPI type BlockdevCreateOptions */
 662    qdict_put_str(qdict, "driver", "parallels");
 663    qdict_put_str(qdict, "file", bs->node_name);
 664
 665    v = qobject_input_visitor_new_flat_confused(qdict, errp);
 666    if (!v) {
 667        ret = -EINVAL;
 668        goto done;
 669    }
 670
 671    visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp);
 672    visit_free(v);
 673    if (!create_options) {
 674        ret = -EINVAL;
 675        goto done;
 676    }
 677
 678    /* Silently round up sizes */
 679    create_options->u.parallels.size =
 680        ROUND_UP(create_options->u.parallels.size, BDRV_SECTOR_SIZE);
 681    create_options->u.parallels.cluster_size =
 682        ROUND_UP(create_options->u.parallels.cluster_size, BDRV_SECTOR_SIZE);
 683
 684    /* Create the Parallels image (format layer) */
 685    ret = parallels_co_create(create_options, errp);
 686    if (ret < 0) {
 687        goto done;
 688    }
 689    ret = 0;
 690
 691done:
 692    qobject_unref(qdict);
 693    bdrv_unref(bs);
 694    qapi_free_BlockdevCreateOptions(create_options);
 695    return ret;
 696}
 697
 698
 699static int parallels_probe(const uint8_t *buf, int buf_size,
 700                           const char *filename)
 701{
 702    const ParallelsHeader *ph = (const void *)buf;
 703
 704    if (buf_size < sizeof(ParallelsHeader)) {
 705        return 0;
 706    }
 707
 708    if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
 709           !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
 710           (le32_to_cpu(ph->version) == HEADER_VERSION)) {
 711        return 100;
 712    }
 713
 714    return 0;
 715}
 716
 717static int parallels_update_header(BlockDriverState *bs)
 718{
 719    BDRVParallelsState *s = bs->opaque;
 720    unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
 721                        sizeof(ParallelsHeader));
 722
 723    if (size > s->header_size) {
 724        size = s->header_size;
 725    }
 726    return bdrv_pwrite_sync(bs->file, 0, s->header, size);
 727}
 728
 729static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
 730                          Error **errp)
 731{
 732    BDRVParallelsState *s = bs->opaque;
 733    ParallelsHeader ph;
 734    int ret, size, i;
 735    QemuOpts *opts = NULL;
 736    Error *local_err = NULL;
 737    char *buf;
 738
 739    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
 740                               BDRV_CHILD_IMAGE, false, errp);
 741    if (!bs->file) {
 742        return -EINVAL;
 743    }
 744
 745    ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
 746    if (ret < 0) {
 747        goto fail;
 748    }
 749
 750    bs->total_sectors = le64_to_cpu(ph.nb_sectors);
 751
 752    if (le32_to_cpu(ph.version) != HEADER_VERSION) {
 753        goto fail_format;
 754    }
 755    if (!memcmp(ph.magic, HEADER_MAGIC, 16)) {
 756        s->off_multiplier = 1;
 757        bs->total_sectors = 0xffffffff & bs->total_sectors;
 758    } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) {
 759        s->off_multiplier = le32_to_cpu(ph.tracks);
 760    } else {
 761        goto fail_format;
 762    }
 763
 764    s->tracks = le32_to_cpu(ph.tracks);
 765    if (s->tracks == 0) {
 766        error_setg(errp, "Invalid image: Zero sectors per track");
 767        ret = -EINVAL;
 768        goto fail;
 769    }
 770    if (s->tracks > INT32_MAX/513) {
 771        error_setg(errp, "Invalid image: Too big cluster");
 772        ret = -EFBIG;
 773        goto fail;
 774    }
 775    s->cluster_size = s->tracks << BDRV_SECTOR_BITS;
 776
 777    s->bat_size = le32_to_cpu(ph.bat_entries);
 778    if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
 779        error_setg(errp, "Catalog too large");
 780        ret = -EFBIG;
 781        goto fail;
 782    }
 783
 784    size = bat_entry_off(s->bat_size);
 785    s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
 786    s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
 787    if (s->header == NULL) {
 788        ret = -ENOMEM;
 789        goto fail;
 790    }
 791    s->data_end = le32_to_cpu(ph.data_off);
 792    if (s->data_end == 0) {
 793        s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
 794    }
 795    if (s->data_end < s->header_size) {
 796        /* there is not enough unused space to fit to block align between BAT
 797           and actual data. We can't avoid read-modify-write... */
 798        s->header_size = size;
 799    }
 800
 801    ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
 802    if (ret < 0) {
 803        goto fail;
 804    }
 805    s->bat_bitmap = (uint32_t *)(s->header + 1);
 806
 807    for (i = 0; i < s->bat_size; i++) {
 808        int64_t off = bat2sect(s, i);
 809        if (off >= s->data_end) {
 810            s->data_end = off + s->tracks;
 811        }
 812    }
 813
 814    if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
 815        /* Image was not closed correctly. The check is mandatory */
 816        s->header_unclean = true;
 817        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
 818            error_setg(errp, "parallels: Image was not closed correctly; "
 819                       "cannot be opened read/write");
 820            ret = -EACCES;
 821            goto fail;
 822        }
 823    }
 824
 825    opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, errp);
 826    if (!opts) {
 827        goto fail_options;
 828    }
 829
 830    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
 831        goto fail_options;
 832    }
 833
 834    s->prealloc_size =
 835        qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
 836    s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
 837    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
 838    /* prealloc_mode can be downgraded later during allocate_clusters */
 839    s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
 840                                       PRL_PREALLOC_MODE_FALLOCATE,
 841                                       &local_err);
 842    g_free(buf);
 843    if (local_err != NULL) {
 844        error_propagate(errp, local_err);
 845        goto fail_options;
 846    }
 847
 848    if (ph.ext_off) {
 849        if (flags & BDRV_O_RDWR) {
 850            /*
 851             * It's unsafe to open image RW if there is an extension (as we
 852             * don't support it). But parallels driver in QEMU historically
 853             * ignores the extension, so print warning and don't care.
 854             */
 855            warn_report("Format Extension ignored in RW mode");
 856        } else {
 857            ret = parallels_read_format_extension(
 858                    bs, le64_to_cpu(ph.ext_off) << BDRV_SECTOR_BITS, errp);
 859            if (ret < 0) {
 860                goto fail;
 861            }
 862        }
 863    }
 864
 865    if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
 866        s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
 867        ret = parallels_update_header(bs);
 868        if (ret < 0) {
 869            goto fail;
 870        }
 871    }
 872
 873    s->bat_dirty_block = 4 * qemu_real_host_page_size;
 874    s->bat_dirty_bmap =
 875        bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
 876
 877    /* Disable migration until bdrv_activate method is added */
 878    error_setg(&s->migration_blocker, "The Parallels format used by node '%s' "
 879               "does not support live migration",
 880               bdrv_get_device_or_node_name(bs));
 881    ret = migrate_add_blocker(s->migration_blocker, errp);
 882    if (ret < 0) {
 883        error_free(s->migration_blocker);
 884        goto fail;
 885    }
 886    qemu_co_mutex_init(&s->lock);
 887    return 0;
 888
 889fail_format:
 890    error_setg(errp, "Image not in Parallels format");
 891fail_options:
 892    ret = -EINVAL;
 893fail:
 894    qemu_vfree(s->header);
 895    return ret;
 896}
 897
 898
 899static void parallels_close(BlockDriverState *bs)
 900{
 901    BDRVParallelsState *s = bs->opaque;
 902
 903    if ((bs->open_flags & BDRV_O_RDWR) && !(bs->open_flags & BDRV_O_INACTIVE)) {
 904        s->header->inuse = 0;
 905        parallels_update_header(bs);
 906
 907        /* errors are ignored, so we might as well pass exact=true */
 908        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true,
 909                      PREALLOC_MODE_OFF, 0, NULL);
 910    }
 911
 912    g_free(s->bat_dirty_bmap);
 913    qemu_vfree(s->header);
 914
 915    migrate_del_blocker(s->migration_blocker);
 916    error_free(s->migration_blocker);
 917}
 918
 919static BlockDriver bdrv_parallels = {
 920    .format_name        = "parallels",
 921    .instance_size      = sizeof(BDRVParallelsState),
 922    .bdrv_probe         = parallels_probe,
 923    .bdrv_open          = parallels_open,
 924    .bdrv_close         = parallels_close,
 925    .bdrv_child_perm          = bdrv_default_perms,
 926    .bdrv_co_block_status     = parallels_co_block_status,
 927    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
 928    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
 929    .bdrv_co_readv  = parallels_co_readv,
 930    .bdrv_co_writev = parallels_co_writev,
 931    .is_format      = true,
 932    .supports_backing = true,
 933    .bdrv_co_create      = parallels_co_create,
 934    .bdrv_co_create_opts = parallels_co_create_opts,
 935    .bdrv_co_check  = parallels_co_check,
 936    .create_opts    = &parallels_create_opts,
 937};
 938
 939static void bdrv_parallels_init(void)
 940{
 941    bdrv_register(&bdrv_parallels);
 942}
 943
 944block_init(bdrv_parallels_init);
 945