qemu/block/parallels.c
<<
>>
Prefs
   1/*
   2 * Block driver for Parallels disk image format
   3 *
   4 * Copyright (c) 2007 Alex Beregszaszi
   5 * Copyright (c) 2015 Denis V. Lunev <den@openvz.org>
   6 *
   7 * This code was originally based on comparing different disk images created
   8 * by Parallels. Currently it is based on opened OpenVZ sources
   9 * available at
  10 *     http://git.openvz.org/?p=ploop;a=summary
  11 *
  12 * Permission is hereby granted, free of charge, to any person obtaining a copy
  13 * of this software and associated documentation files (the "Software"), to deal
  14 * in the Software without restriction, including without limitation the rights
  15 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  16 * copies of the Software, and to permit persons to whom the Software is
  17 * furnished to do so, subject to the following conditions:
  18 *
  19 * The above copyright notice and this permission notice shall be included in
  20 * all copies or substantial portions of the Software.
  21 *
  22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  27 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  28 * THE SOFTWARE.
  29 */
  30
  31#include "qemu/osdep.h"
  32#include "qemu/error-report.h"
  33#include "qapi/error.h"
  34#include "block/block_int.h"
  35#include "block/qdict.h"
  36#include "sysemu/block-backend.h"
  37#include "qemu/module.h"
  38#include "qemu/option.h"
  39#include "qapi/qmp/qdict.h"
  40#include "qapi/qobject-input-visitor.h"
  41#include "qapi/qapi-visit-block-core.h"
  42#include "qemu/bswap.h"
  43#include "qemu/bitmap.h"
  44#include "qemu/memalign.h"
  45#include "migration/blocker.h"
  46#include "parallels.h"
  47
  48/**************************************************************/
  49
  50#define HEADER_MAGIC "WithoutFreeSpace"
  51#define HEADER_MAGIC2 "WithouFreSpacExt"
  52#define HEADER_VERSION 2
  53#define HEADER_INUSE_MAGIC  (0x746F6E59)
  54#define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)
  55
  56static QEnumLookup prealloc_mode_lookup = {
  57    .array = (const char *const[]) {
  58        "falloc",
  59        "truncate",
  60    },
  61    .size = PRL_PREALLOC_MODE__MAX
  62};
  63
  64#define PARALLELS_OPT_PREALLOC_MODE     "prealloc-mode"
  65#define PARALLELS_OPT_PREALLOC_SIZE     "prealloc-size"
  66
  67static QemuOptsList parallels_runtime_opts = {
  68    .name = "parallels",
  69    .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head),
  70    .desc = {
  71        {
  72            .name = PARALLELS_OPT_PREALLOC_SIZE,
  73            .type = QEMU_OPT_SIZE,
  74            .help = "Preallocation size on image expansion",
  75            .def_value_str = "128M",
  76        },
  77        {
  78            .name = PARALLELS_OPT_PREALLOC_MODE,
  79            .type = QEMU_OPT_STRING,
  80            .help = "Preallocation mode on image expansion "
  81                    "(allowed values: falloc, truncate)",
  82            .def_value_str = "falloc",
  83        },
  84        { /* end of list */ },
  85    },
  86};
  87
  88static QemuOptsList parallels_create_opts = {
  89    .name = "parallels-create-opts",
  90    .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head),
  91    .desc = {
  92        {
  93            .name = BLOCK_OPT_SIZE,
  94            .type = QEMU_OPT_SIZE,
  95            .help = "Virtual disk size",
  96        },
  97        {
  98            .name = BLOCK_OPT_CLUSTER_SIZE,
  99            .type = QEMU_OPT_SIZE,
 100            .help = "Parallels image cluster size",
 101            .def_value_str = stringify(DEFAULT_CLUSTER_SIZE),
 102        },
 103        { /* end of list */ }
 104    }
 105};
 106
 107
 108static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx)
 109{
 110    return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier;
 111}
 112
 113static uint32_t bat_entry_off(uint32_t idx)
 114{
 115    return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx;
 116}
 117
 118static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num)
 119{
 120    uint32_t index, offset;
 121
 122    index = sector_num / s->tracks;
 123    offset = sector_num % s->tracks;
 124
 125    /* not allocated */
 126    if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) {
 127        return -1;
 128    }
 129    return bat2sect(s, index) + offset;
 130}
 131
 132static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
 133        int nb_sectors)
 134{
 135    int ret = s->tracks - sector_num % s->tracks;
 136    return MIN(nb_sectors, ret);
 137}
 138
 139static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
 140                            int nb_sectors, int *pnum)
 141{
 142    int64_t start_off = -2, prev_end_off = -2;
 143
 144    *pnum = 0;
 145    while (nb_sectors > 0 || start_off == -2) {
 146        int64_t offset = seek_to_sector(s, sector_num);
 147        int to_end;
 148
 149        if (start_off == -2) {
 150            start_off = offset;
 151            prev_end_off = offset;
 152        } else if (offset != prev_end_off) {
 153            break;
 154        }
 155
 156        to_end = cluster_remainder(s, sector_num, nb_sectors);
 157        nb_sectors -= to_end;
 158        sector_num += to_end;
 159        *pnum += to_end;
 160
 161        if (offset > 0) {
 162            prev_end_off += to_end;
 163        }
 164    }
 165    return start_off;
 166}
 167
 168static void parallels_set_bat_entry(BDRVParallelsState *s,
 169                                    uint32_t index, uint32_t offset)
 170{
 171    s->bat_bitmap[index] = cpu_to_le32(offset);
 172    bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1);
 173}
 174
 175static int64_t coroutine_fn GRAPH_RDLOCK
 176allocate_clusters(BlockDriverState *bs, int64_t sector_num,
 177                  int nb_sectors, int *pnum)
 178{
 179    int ret = 0;
 180    BDRVParallelsState *s = bs->opaque;
 181    int64_t pos, space, idx, to_allocate, i, len;
 182
 183    pos = block_status(s, sector_num, nb_sectors, pnum);
 184    if (pos > 0) {
 185        return pos;
 186    }
 187
 188    idx = sector_num / s->tracks;
 189    to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
 190
 191    /* This function is called only by parallels_co_writev(), which will never
 192     * pass a sector_num at or beyond the end of the image (because the block
 193     * layer never passes such a sector_num to that function). Therefore, idx
 194     * is always below s->bat_size.
 195     * block_status() will limit *pnum so that sector_num + *pnum will not
 196     * exceed the image end. Therefore, idx + to_allocate cannot exceed
 197     * s->bat_size.
 198     * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
 199     * will always fit into a uint32_t. */
 200    assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
 201
 202    space = to_allocate * s->tracks;
 203    len = bdrv_co_getlength(bs->file->bs);
 204    if (len < 0) {
 205        return len;
 206    }
 207    if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
 208        space += s->prealloc_size;
 209        /*
 210         * We require the expanded size to read back as zero. If the
 211         * user permitted truncation, we try that; but if it fails, we
 212         * force the safer-but-slower fallocate.
 213         */
 214        if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) {
 215            ret = bdrv_co_truncate(bs->file,
 216                                   (s->data_end + space) << BDRV_SECTOR_BITS,
 217                                   false, PREALLOC_MODE_OFF,
 218                                   BDRV_REQ_ZERO_WRITE, NULL);
 219            if (ret == -ENOTSUP) {
 220                s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
 221            }
 222        }
 223        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
 224            ret = bdrv_co_pwrite_zeroes(bs->file,
 225                                        s->data_end << BDRV_SECTOR_BITS,
 226                                        space << BDRV_SECTOR_BITS, 0);
 227        }
 228        if (ret < 0) {
 229            return ret;
 230        }
 231    }
 232
 233    /* Try to read from backing to fill empty clusters
 234     * FIXME: 1. previous write_zeroes may be redundant
 235     *        2. most of data we read from backing will be rewritten by
 236     *           parallels_co_writev. On aligned-to-cluster write we do not need
 237     *           this read at all.
 238     *        3. it would be good to combine write of data from backing and new
 239     *           data into one write call */
 240    if (bs->backing) {
 241        int64_t nb_cow_sectors = to_allocate * s->tracks;
 242        int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
 243        void *buf = qemu_blockalign(bs, nb_cow_bytes);
 244
 245        ret = bdrv_co_pread(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE,
 246                            nb_cow_bytes, buf, 0);
 247        if (ret < 0) {
 248            qemu_vfree(buf);
 249            return ret;
 250        }
 251
 252        ret = bdrv_co_pwrite(bs->file, s->data_end * BDRV_SECTOR_SIZE,
 253                             nb_cow_bytes, buf, 0);
 254        qemu_vfree(buf);
 255        if (ret < 0) {
 256            return ret;
 257        }
 258    }
 259
 260    for (i = 0; i < to_allocate; i++) {
 261        parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier);
 262        s->data_end += s->tracks;
 263    }
 264
 265    return bat2sect(s, idx) + sector_num % s->tracks;
 266}
 267
 268
 269static int coroutine_fn GRAPH_RDLOCK
 270parallels_co_flush_to_os(BlockDriverState *bs)
 271{
 272    BDRVParallelsState *s = bs->opaque;
 273    unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
 274    unsigned long bit;
 275
 276    qemu_co_mutex_lock(&s->lock);
 277
 278    bit = find_first_bit(s->bat_dirty_bmap, size);
 279    while (bit < size) {
 280        uint32_t off = bit * s->bat_dirty_block;
 281        uint32_t to_write = s->bat_dirty_block;
 282        int ret;
 283
 284        if (off + to_write > s->header_size) {
 285            to_write = s->header_size - off;
 286        }
 287        ret = bdrv_co_pwrite(bs->file, off, to_write,
 288                             (uint8_t *)s->header + off, 0);
 289        if (ret < 0) {
 290            qemu_co_mutex_unlock(&s->lock);
 291            return ret;
 292        }
 293        bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1);
 294    }
 295    bitmap_zero(s->bat_dirty_bmap, size);
 296
 297    qemu_co_mutex_unlock(&s->lock);
 298    return 0;
 299}
 300
 301
 302static int coroutine_fn parallels_co_block_status(BlockDriverState *bs,
 303                                                  bool want_zero,
 304                                                  int64_t offset,
 305                                                  int64_t bytes,
 306                                                  int64_t *pnum,
 307                                                  int64_t *map,
 308                                                  BlockDriverState **file)
 309{
 310    BDRVParallelsState *s = bs->opaque;
 311    int count;
 312
 313    assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
 314    qemu_co_mutex_lock(&s->lock);
 315    offset = block_status(s, offset >> BDRV_SECTOR_BITS,
 316                          bytes >> BDRV_SECTOR_BITS, &count);
 317    qemu_co_mutex_unlock(&s->lock);
 318
 319    *pnum = count * BDRV_SECTOR_SIZE;
 320    if (offset < 0) {
 321        return 0;
 322    }
 323
 324    *map = offset * BDRV_SECTOR_SIZE;
 325    *file = bs->file->bs;
 326    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
 327}
 328
 329static int coroutine_fn GRAPH_RDLOCK
 330parallels_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 331                    QEMUIOVector *qiov, int flags)
 332{
 333    BDRVParallelsState *s = bs->opaque;
 334    uint64_t bytes_done = 0;
 335    QEMUIOVector hd_qiov;
 336    int ret = 0;
 337
 338    qemu_iovec_init(&hd_qiov, qiov->niov);
 339
 340    while (nb_sectors > 0) {
 341        int64_t position;
 342        int n, nbytes;
 343
 344        qemu_co_mutex_lock(&s->lock);
 345        position = allocate_clusters(bs, sector_num, nb_sectors, &n);
 346        qemu_co_mutex_unlock(&s->lock);
 347        if (position < 0) {
 348            ret = (int)position;
 349            break;
 350        }
 351
 352        nbytes = n << BDRV_SECTOR_BITS;
 353
 354        qemu_iovec_reset(&hd_qiov);
 355        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
 356
 357        ret = bdrv_co_pwritev(bs->file, position * BDRV_SECTOR_SIZE, nbytes,
 358                              &hd_qiov, 0);
 359        if (ret < 0) {
 360            break;
 361        }
 362
 363        nb_sectors -= n;
 364        sector_num += n;
 365        bytes_done += nbytes;
 366    }
 367
 368    qemu_iovec_destroy(&hd_qiov);
 369    return ret;
 370}
 371
 372static int coroutine_fn GRAPH_RDLOCK
 373parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 374                   QEMUIOVector *qiov)
 375{
 376    BDRVParallelsState *s = bs->opaque;
 377    uint64_t bytes_done = 0;
 378    QEMUIOVector hd_qiov;
 379    int ret = 0;
 380
 381    qemu_iovec_init(&hd_qiov, qiov->niov);
 382
 383    while (nb_sectors > 0) {
 384        int64_t position;
 385        int n, nbytes;
 386
 387        qemu_co_mutex_lock(&s->lock);
 388        position = block_status(s, sector_num, nb_sectors, &n);
 389        qemu_co_mutex_unlock(&s->lock);
 390
 391        nbytes = n << BDRV_SECTOR_BITS;
 392
 393        qemu_iovec_reset(&hd_qiov);
 394        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
 395
 396        if (position < 0) {
 397            if (bs->backing) {
 398                ret = bdrv_co_preadv(bs->backing, sector_num * BDRV_SECTOR_SIZE,
 399                                     nbytes, &hd_qiov, 0);
 400                if (ret < 0) {
 401                    break;
 402                }
 403            } else {
 404                qemu_iovec_memset(&hd_qiov, 0, 0, nbytes);
 405            }
 406        } else {
 407            ret = bdrv_co_preadv(bs->file, position * BDRV_SECTOR_SIZE, nbytes,
 408                                 &hd_qiov, 0);
 409            if (ret < 0) {
 410                break;
 411            }
 412        }
 413
 414        nb_sectors -= n;
 415        sector_num += n;
 416        bytes_done += nbytes;
 417    }
 418
 419    qemu_iovec_destroy(&hd_qiov);
 420    return ret;
 421}
 422
 423static void parallels_check_unclean(BlockDriverState *bs,
 424                                    BdrvCheckResult *res,
 425                                    BdrvCheckMode fix)
 426{
 427    BDRVParallelsState *s = bs->opaque;
 428
 429    if (!s->header_unclean) {
 430        return;
 431    }
 432
 433    fprintf(stderr, "%s image was not closed correctly\n",
 434            fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
 435    res->corruptions++;
 436    if (fix & BDRV_FIX_ERRORS) {
 437        /* parallels_close will do the job right */
 438        res->corruptions_fixed++;
 439        s->header_unclean = false;
 440    }
 441}
 442
 443static int coroutine_fn GRAPH_RDLOCK
 444parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
 445                              BdrvCheckMode fix)
 446{
 447    BDRVParallelsState *s = bs->opaque;
 448    uint32_t i;
 449    int64_t off, high_off, size;
 450
 451    size = bdrv_co_getlength(bs->file->bs);
 452    if (size < 0) {
 453        res->check_errors++;
 454        return size;
 455    }
 456
 457    high_off = 0;
 458    for (i = 0; i < s->bat_size; i++) {
 459        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
 460        if (off + s->cluster_size > size) {
 461            fprintf(stderr, "%s cluster %u is outside image\n",
 462                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
 463            res->corruptions++;
 464            if (fix & BDRV_FIX_ERRORS) {
 465                parallels_set_bat_entry(s, i, 0);
 466                res->corruptions_fixed++;
 467            }
 468            continue;
 469        }
 470        if (high_off < off) {
 471            high_off = off;
 472        }
 473    }
 474
 475    if (high_off == 0) {
 476        res->image_end_offset = s->data_end << BDRV_SECTOR_BITS;
 477    } else {
 478        res->image_end_offset = high_off + s->cluster_size;
 479        s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS;
 480    }
 481
 482    return 0;
 483}
 484
 485static int coroutine_fn GRAPH_RDLOCK
 486parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
 487                     BdrvCheckMode fix)
 488{
 489    BDRVParallelsState *s = bs->opaque;
 490    int64_t size;
 491    int ret;
 492
 493    size = bdrv_getlength(bs->file->bs);
 494    if (size < 0) {
 495        res->check_errors++;
 496        return size;
 497    }
 498
 499    if (size > res->image_end_offset) {
 500        int64_t count;
 501        count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
 502        fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
 503                fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
 504                size - res->image_end_offset);
 505        res->leaks += count;
 506        if (fix & BDRV_FIX_LEAKS) {
 507            Error *local_err = NULL;
 508
 509            /*
 510             * In order to really repair the image, we must shrink it.
 511             * That means we have to pass exact=true.
 512             */
 513            ret = bdrv_co_truncate(bs->file, res->image_end_offset, true,
 514                                   PREALLOC_MODE_OFF, 0, &local_err);
 515            if (ret < 0) {
 516                error_report_err(local_err);
 517                res->check_errors++;
 518                return ret;
 519            }
 520            res->leaks_fixed += count;
 521        }
 522    }
 523
 524    return 0;
 525}
 526
 527static void parallels_collect_statistics(BlockDriverState *bs,
 528                                         BdrvCheckResult *res,
 529                                         BdrvCheckMode fix)
 530{
 531    BDRVParallelsState *s = bs->opaque;
 532    int64_t off, prev_off;
 533    uint32_t i;
 534
 535    res->bfi.total_clusters = s->bat_size;
 536    res->bfi.compressed_clusters = 0; /* compression is not supported */
 537
 538    prev_off = 0;
 539    for (i = 0; i < s->bat_size; i++) {
 540        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
 541        /*
 542         * If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not
 543         * fixed. Skip not allocated and out-of-image BAT entries.
 544         */
 545        if (off == 0 || off + s->cluster_size > res->image_end_offset) {
 546            prev_off = 0;
 547            continue;
 548        }
 549
 550        if (prev_off != 0 && (prev_off + s->cluster_size) != off) {
 551            res->bfi.fragmented_clusters++;
 552        }
 553        prev_off = off;
 554        res->bfi.allocated_clusters++;
 555    }
 556}
 557
 558static int coroutine_fn GRAPH_RDLOCK
 559parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res,
 560                   BdrvCheckMode fix)
 561{
 562    BDRVParallelsState *s = bs->opaque;
 563    int ret;
 564
 565    WITH_QEMU_LOCK_GUARD(&s->lock) {
 566        parallels_check_unclean(bs, res, fix);
 567
 568        ret = parallels_check_outside_image(bs, res, fix);
 569        if (ret < 0) {
 570            return ret;
 571        }
 572
 573        ret = parallels_check_leak(bs, res, fix);
 574        if (ret < 0) {
 575            return ret;
 576        }
 577
 578        parallels_collect_statistics(bs, res, fix);
 579    }
 580
 581    ret = bdrv_co_flush(bs);
 582    if (ret < 0) {
 583        res->check_errors++;
 584    }
 585
 586    return ret;
 587}
 588
 589
 590static int coroutine_fn GRAPH_UNLOCKED
 591parallels_co_create(BlockdevCreateOptions* opts, Error **errp)
 592{
 593    BlockdevCreateOptionsParallels *parallels_opts;
 594    BlockDriverState *bs;
 595    BlockBackend *blk;
 596    int64_t total_size, cl_size;
 597    uint32_t bat_entries, bat_sectors;
 598    ParallelsHeader header;
 599    uint8_t tmp[BDRV_SECTOR_SIZE];
 600    int ret;
 601
 602    assert(opts->driver == BLOCKDEV_DRIVER_PARALLELS);
 603    parallels_opts = &opts->u.parallels;
 604
 605    /* Sanity checks */
 606    total_size = parallels_opts->size;
 607
 608    if (parallels_opts->has_cluster_size) {
 609        cl_size = parallels_opts->cluster_size;
 610    } else {
 611        cl_size = DEFAULT_CLUSTER_SIZE;
 612    }
 613
 614    /* XXX What is the real limit here? This is an insanely large maximum. */
 615    if (cl_size >= INT64_MAX / MAX_PARALLELS_IMAGE_FACTOR) {
 616        error_setg(errp, "Cluster size is too large");
 617        return -EINVAL;
 618    }
 619    if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) {
 620        error_setg(errp, "Image size is too large for this cluster size");
 621        return -E2BIG;
 622    }
 623
 624    if (!QEMU_IS_ALIGNED(total_size, BDRV_SECTOR_SIZE)) {
 625        error_setg(errp, "Image size must be a multiple of 512 bytes");
 626        return -EINVAL;
 627    }
 628
 629    if (!QEMU_IS_ALIGNED(cl_size, BDRV_SECTOR_SIZE)) {
 630        error_setg(errp, "Cluster size must be a multiple of 512 bytes");
 631        return -EINVAL;
 632    }
 633
 634    /* Create BlockBackend to write to the image */
 635    bs = bdrv_co_open_blockdev_ref(parallels_opts->file, errp);
 636    if (bs == NULL) {
 637        return -EIO;
 638    }
 639
 640    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
 641                             errp);
 642    if (!blk) {
 643        ret = -EPERM;
 644        goto out;
 645    }
 646    blk_set_allow_write_beyond_eof(blk, true);
 647
 648    /* Create image format */
 649    bat_entries = DIV_ROUND_UP(total_size, cl_size);
 650    bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size);
 651    bat_sectors = (bat_sectors *  cl_size) >> BDRV_SECTOR_BITS;
 652
 653    memset(&header, 0, sizeof(header));
 654    memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
 655    header.version = cpu_to_le32(HEADER_VERSION);
 656    /* don't care much about geometry, it is not used on image level */
 657    header.heads = cpu_to_le32(HEADS_NUMBER);
 658    header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE
 659                                   / HEADS_NUMBER / SEC_IN_CYL);
 660    header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
 661    header.bat_entries = cpu_to_le32(bat_entries);
 662    header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
 663    header.data_off = cpu_to_le32(bat_sectors);
 664
 665    /* write all the data */
 666    memset(tmp, 0, sizeof(tmp));
 667    memcpy(tmp, &header, sizeof(header));
 668
 669    ret = blk_co_pwrite(blk, 0, BDRV_SECTOR_SIZE, tmp, 0);
 670    if (ret < 0) {
 671        goto exit;
 672    }
 673    ret = blk_co_pwrite_zeroes(blk, BDRV_SECTOR_SIZE,
 674                               (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
 675    if (ret < 0) {
 676        goto exit;
 677    }
 678
 679    ret = 0;
 680out:
 681    blk_co_unref(blk);
 682    bdrv_co_unref(bs);
 683    return ret;
 684
 685exit:
 686    error_setg_errno(errp, -ret, "Failed to create Parallels image");
 687    goto out;
 688}
 689
 690static int coroutine_fn GRAPH_UNLOCKED
 691parallels_co_create_opts(BlockDriver *drv, const char *filename,
 692                         QemuOpts *opts, Error **errp)
 693{
 694    BlockdevCreateOptions *create_options = NULL;
 695    BlockDriverState *bs = NULL;
 696    QDict *qdict;
 697    Visitor *v;
 698    int ret;
 699
 700    static const QDictRenames opt_renames[] = {
 701        { BLOCK_OPT_CLUSTER_SIZE,       "cluster-size" },
 702        { NULL, NULL },
 703    };
 704
 705    /* Parse options and convert legacy syntax */
 706    qdict = qemu_opts_to_qdict_filtered(opts, NULL, &parallels_create_opts,
 707                                        true);
 708
 709    if (!qdict_rename_keys(qdict, opt_renames, errp)) {
 710        ret = -EINVAL;
 711        goto done;
 712    }
 713
 714    /* Create and open the file (protocol layer) */
 715    ret = bdrv_co_create_file(filename, opts, errp);
 716    if (ret < 0) {
 717        goto done;
 718    }
 719
 720    bs = bdrv_co_open(filename, NULL, NULL,
 721                      BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
 722    if (bs == NULL) {
 723        ret = -EIO;
 724        goto done;
 725    }
 726
 727    /* Now get the QAPI type BlockdevCreateOptions */
 728    qdict_put_str(qdict, "driver", "parallels");
 729    qdict_put_str(qdict, "file", bs->node_name);
 730
 731    v = qobject_input_visitor_new_flat_confused(qdict, errp);
 732    if (!v) {
 733        ret = -EINVAL;
 734        goto done;
 735    }
 736
 737    visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp);
 738    visit_free(v);
 739    if (!create_options) {
 740        ret = -EINVAL;
 741        goto done;
 742    }
 743
 744    /* Silently round up sizes */
 745    create_options->u.parallels.size =
 746        ROUND_UP(create_options->u.parallels.size, BDRV_SECTOR_SIZE);
 747    create_options->u.parallels.cluster_size =
 748        ROUND_UP(create_options->u.parallels.cluster_size, BDRV_SECTOR_SIZE);
 749
 750    /* Create the Parallels image (format layer) */
 751    ret = parallels_co_create(create_options, errp);
 752    if (ret < 0) {
 753        goto done;
 754    }
 755    ret = 0;
 756
 757done:
 758    qobject_unref(qdict);
 759    bdrv_co_unref(bs);
 760    qapi_free_BlockdevCreateOptions(create_options);
 761    return ret;
 762}
 763
 764
 765static int parallels_probe(const uint8_t *buf, int buf_size,
 766                           const char *filename)
 767{
 768    const ParallelsHeader *ph = (const void *)buf;
 769
 770    if (buf_size < sizeof(ParallelsHeader)) {
 771        return 0;
 772    }
 773
 774    if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
 775           !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
 776           (le32_to_cpu(ph->version) == HEADER_VERSION)) {
 777        return 100;
 778    }
 779
 780    return 0;
 781}
 782
 783static int parallels_update_header(BlockDriverState *bs)
 784{
 785    BDRVParallelsState *s = bs->opaque;
 786    unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
 787                        sizeof(ParallelsHeader));
 788
 789    if (size > s->header_size) {
 790        size = s->header_size;
 791    }
 792    return bdrv_pwrite_sync(bs->file, 0, size, s->header, 0);
 793}
 794
 795static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
 796                          Error **errp)
 797{
 798    BDRVParallelsState *s = bs->opaque;
 799    ParallelsHeader ph;
 800    int ret, size, i;
 801    int64_t file_nb_sectors;
 802    QemuOpts *opts = NULL;
 803    Error *local_err = NULL;
 804    char *buf;
 805
 806    ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
 807    if (ret < 0) {
 808        return ret;
 809    }
 810
 811    file_nb_sectors = bdrv_nb_sectors(bs->file->bs);
 812    if (file_nb_sectors < 0) {
 813        return -EINVAL;
 814    }
 815
 816    ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0);
 817    if (ret < 0) {
 818        goto fail;
 819    }
 820
 821    bs->total_sectors = le64_to_cpu(ph.nb_sectors);
 822
 823    if (le32_to_cpu(ph.version) != HEADER_VERSION) {
 824        goto fail_format;
 825    }
 826    if (!memcmp(ph.magic, HEADER_MAGIC, 16)) {
 827        s->off_multiplier = 1;
 828        bs->total_sectors = 0xffffffff & bs->total_sectors;
 829    } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) {
 830        s->off_multiplier = le32_to_cpu(ph.tracks);
 831    } else {
 832        goto fail_format;
 833    }
 834
 835    s->tracks = le32_to_cpu(ph.tracks);
 836    if (s->tracks == 0) {
 837        error_setg(errp, "Invalid image: Zero sectors per track");
 838        ret = -EINVAL;
 839        goto fail;
 840    }
 841    if (s->tracks > INT32_MAX/513) {
 842        error_setg(errp, "Invalid image: Too big cluster");
 843        ret = -EFBIG;
 844        goto fail;
 845    }
 846    s->cluster_size = s->tracks << BDRV_SECTOR_BITS;
 847
 848    s->bat_size = le32_to_cpu(ph.bat_entries);
 849    if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
 850        error_setg(errp, "Catalog too large");
 851        ret = -EFBIG;
 852        goto fail;
 853    }
 854
 855    size = bat_entry_off(s->bat_size);
 856    s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
 857    s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
 858    if (s->header == NULL) {
 859        ret = -ENOMEM;
 860        goto fail;
 861    }
 862    s->data_end = le32_to_cpu(ph.data_off);
 863    if (s->data_end == 0) {
 864        s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
 865    }
 866    if (s->data_end < s->header_size) {
 867        /* there is not enough unused space to fit to block align between BAT
 868           and actual data. We can't avoid read-modify-write... */
 869        s->header_size = size;
 870    }
 871
 872    ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0);
 873    if (ret < 0) {
 874        goto fail;
 875    }
 876    s->bat_bitmap = (uint32_t *)(s->header + 1);
 877
 878    for (i = 0; i < s->bat_size; i++) {
 879        int64_t off = bat2sect(s, i);
 880        if (off >= file_nb_sectors) {
 881            if (flags & BDRV_O_CHECK) {
 882                continue;
 883            }
 884            error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry "
 885                       "is larger than file size (%" PRIi64 ")",
 886                       off << BDRV_SECTOR_BITS, i,
 887                       file_nb_sectors << BDRV_SECTOR_BITS);
 888            ret = -EINVAL;
 889            goto fail;
 890        }
 891        if (off >= s->data_end) {
 892            s->data_end = off + s->tracks;
 893        }
 894    }
 895
 896    if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
 897        /* Image was not closed correctly. The check is mandatory */
 898        s->header_unclean = true;
 899        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
 900            error_setg(errp, "parallels: Image was not closed correctly; "
 901                       "cannot be opened read/write");
 902            ret = -EACCES;
 903            goto fail;
 904        }
 905    }
 906
 907    opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, errp);
 908    if (!opts) {
 909        goto fail_options;
 910    }
 911
 912    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
 913        goto fail_options;
 914    }
 915
 916    s->prealloc_size =
 917        qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
 918    s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
 919    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
 920    /* prealloc_mode can be downgraded later during allocate_clusters */
 921    s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
 922                                       PRL_PREALLOC_MODE_FALLOCATE,
 923                                       &local_err);
 924    g_free(buf);
 925    if (local_err != NULL) {
 926        error_propagate(errp, local_err);
 927        goto fail_options;
 928    }
 929
 930    if (ph.ext_off) {
 931        if (flags & BDRV_O_RDWR) {
 932            /*
 933             * It's unsafe to open image RW if there is an extension (as we
 934             * don't support it). But parallels driver in QEMU historically
 935             * ignores the extension, so print warning and don't care.
 936             */
 937            warn_report("Format Extension ignored in RW mode");
 938        } else {
 939            ret = parallels_read_format_extension(
 940                    bs, le64_to_cpu(ph.ext_off) << BDRV_SECTOR_BITS, errp);
 941            if (ret < 0) {
 942                goto fail;
 943            }
 944        }
 945    }
 946
 947    if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
 948        s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
 949        ret = parallels_update_header(bs);
 950        if (ret < 0) {
 951            goto fail;
 952        }
 953    }
 954
 955    s->bat_dirty_block = 4 * qemu_real_host_page_size();
 956    s->bat_dirty_bmap =
 957        bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
 958
 959    /* Disable migration until bdrv_activate method is added */
 960    error_setg(&s->migration_blocker, "The Parallels format used by node '%s' "
 961               "does not support live migration",
 962               bdrv_get_device_or_node_name(bs));
 963    ret = migrate_add_blocker(s->migration_blocker, errp);
 964    if (ret < 0) {
 965        error_free(s->migration_blocker);
 966        goto fail;
 967    }
 968    qemu_co_mutex_init(&s->lock);
 969    return 0;
 970
 971fail_format:
 972    error_setg(errp, "Image not in Parallels format");
 973fail_options:
 974    ret = -EINVAL;
 975fail:
 976    qemu_vfree(s->header);
 977    return ret;
 978}
 979
 980
 981static void parallels_close(BlockDriverState *bs)
 982{
 983    BDRVParallelsState *s = bs->opaque;
 984
 985    if ((bs->open_flags & BDRV_O_RDWR) && !(bs->open_flags & BDRV_O_INACTIVE)) {
 986        s->header->inuse = 0;
 987        parallels_update_header(bs);
 988
 989        /* errors are ignored, so we might as well pass exact=true */
 990        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true,
 991                      PREALLOC_MODE_OFF, 0, NULL);
 992    }
 993
 994    g_free(s->bat_dirty_bmap);
 995    qemu_vfree(s->header);
 996
 997    migrate_del_blocker(s->migration_blocker);
 998    error_free(s->migration_blocker);
 999}
1000
1001static BlockDriver bdrv_parallels = {
1002    .format_name        = "parallels",
1003    .instance_size      = sizeof(BDRVParallelsState),
1004    .bdrv_probe         = parallels_probe,
1005    .bdrv_open          = parallels_open,
1006    .bdrv_close         = parallels_close,
1007    .bdrv_child_perm          = bdrv_default_perms,
1008    .bdrv_co_block_status     = parallels_co_block_status,
1009    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
1010    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
1011    .bdrv_co_readv  = parallels_co_readv,
1012    .bdrv_co_writev = parallels_co_writev,
1013    .is_format      = true,
1014    .supports_backing = true,
1015    .bdrv_co_create      = parallels_co_create,
1016    .bdrv_co_create_opts = parallels_co_create_opts,
1017    .bdrv_co_check  = parallels_co_check,
1018    .create_opts    = &parallels_create_opts,
1019};
1020
1021static void bdrv_parallels_init(void)
1022{
1023    bdrv_register(&bdrv_parallels);
1024}
1025
1026block_init(bdrv_parallels_init);
1027