qemu/block/raw-format.c
<<
>>
Prefs
   1/* BlockDriver implementation for "raw" format driver
   2 *
   3 * Copyright (C) 2010-2016 Red Hat, Inc.
   4 * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
   5 * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
   6 *
   7 * Author:
   8 *   Laszlo Ersek <lersek@redhat.com>
   9 *
  10 * Permission is hereby granted, free of charge, to any person obtaining a copy
  11 * of this software and associated documentation files (the "Software"), to
  12 * deal in the Software without restriction, including without limitation the
  13 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  14 * sell copies of the Software, and to permit persons to whom the Software is
  15 * furnished to do so, subject to the following conditions:
  16 *
  17 * The above copyright notice and this permission notice shall be included in
  18 * all copies or substantial portions of the Software.
  19 *
  20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  26 * IN THE SOFTWARE.
  27 */
  28
  29#include "qemu/osdep.h"
  30#include "block/block_int.h"
  31#include "qapi/error.h"
  32#include "qemu/module.h"
  33#include "qemu/option.h"
  34
  35typedef struct BDRVRawState {
  36    uint64_t offset;
  37    uint64_t size;
  38    bool has_size;
  39} BDRVRawState;
  40
  41static const char *const mutable_opts[] = { "offset", "size", NULL };
  42
  43static QemuOptsList raw_runtime_opts = {
  44    .name = "raw",
  45    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
  46    .desc = {
  47        {
  48            .name = "offset",
  49            .type = QEMU_OPT_SIZE,
  50            .help = "offset in the disk where the image starts",
  51        },
  52        {
  53            .name = "size",
  54            .type = QEMU_OPT_SIZE,
  55            .help = "virtual disk size",
  56        },
  57        { /* end of list */ }
  58    },
  59};
  60
  61static QemuOptsList raw_create_opts = {
  62    .name = "raw-create-opts",
  63    .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
  64    .desc = {
  65        {
  66            .name = BLOCK_OPT_SIZE,
  67            .type = QEMU_OPT_SIZE,
  68            .help = "Virtual disk size"
  69        },
  70        { /* end of list */ }
  71    }
  72};
  73
  74static int raw_read_options(QDict *options, uint64_t *offset, bool *has_size,
  75                            uint64_t *size, Error **errp)
  76{
  77    QemuOpts *opts = NULL;
  78    int ret;
  79
  80    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
  81    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
  82        ret = -EINVAL;
  83        goto end;
  84    }
  85
  86    *offset = qemu_opt_get_size(opts, "offset", 0);
  87    *has_size = qemu_opt_find(opts, "size");
  88    *size = qemu_opt_get_size(opts, "size", 0);
  89
  90    ret = 0;
  91end:
  92    qemu_opts_del(opts);
  93    return ret;
  94}
  95
  96static int raw_apply_options(BlockDriverState *bs, BDRVRawState *s,
  97                             uint64_t offset, bool has_size, uint64_t size,
  98                             Error **errp)
  99{
 100    int64_t real_size = 0;
 101
 102    real_size = bdrv_getlength(bs->file->bs);
 103    if (real_size < 0) {
 104        error_setg_errno(errp, -real_size, "Could not get image size");
 105        return real_size;
 106    }
 107
 108    /* Check size and offset */
 109    if (offset > real_size) {
 110        error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than "
 111                   "size of the containing file (%" PRId64 ")",
 112                   s->offset, real_size);
 113        return -EINVAL;
 114    }
 115
 116    if (has_size && (real_size - offset) < size) {
 117        error_setg(errp, "The sum of offset (%" PRIu64 ") and size "
 118                   "(%" PRIu64 ") has to be smaller or equal to the "
 119                   " actual size of the containing file (%" PRId64 ")",
 120                   s->offset, s->size, real_size);
 121        return -EINVAL;
 122    }
 123
 124    /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding
 125     * up and leaking out of the specified area. */
 126    if (has_size && !QEMU_IS_ALIGNED(size, BDRV_SECTOR_SIZE)) {
 127        error_setg(errp, "Specified size is not multiple of %llu",
 128                   BDRV_SECTOR_SIZE);
 129        return -EINVAL;
 130    }
 131
 132    s->offset = offset;
 133    s->has_size = has_size;
 134    s->size = has_size ? size : real_size - offset;
 135
 136    return 0;
 137}
 138
 139static int raw_reopen_prepare(BDRVReopenState *reopen_state,
 140                              BlockReopenQueue *queue, Error **errp)
 141{
 142    bool has_size;
 143    uint64_t offset, size;
 144    int ret;
 145
 146    assert(reopen_state != NULL);
 147    assert(reopen_state->bs != NULL);
 148
 149    reopen_state->opaque = g_new0(BDRVRawState, 1);
 150
 151    ret = raw_read_options(reopen_state->options, &offset, &has_size, &size,
 152                           errp);
 153    if (ret < 0) {
 154        return ret;
 155    }
 156
 157    ret = raw_apply_options(reopen_state->bs, reopen_state->opaque,
 158                            offset, has_size, size, errp);
 159    if (ret < 0) {
 160        return ret;
 161    }
 162
 163    return 0;
 164}
 165
 166static void raw_reopen_commit(BDRVReopenState *state)
 167{
 168    BDRVRawState *new_s = state->opaque;
 169    BDRVRawState *s = state->bs->opaque;
 170
 171    memcpy(s, new_s, sizeof(BDRVRawState));
 172
 173    g_free(state->opaque);
 174    state->opaque = NULL;
 175}
 176
 177static void raw_reopen_abort(BDRVReopenState *state)
 178{
 179    g_free(state->opaque);
 180    state->opaque = NULL;
 181}
 182
 183/* Check and adjust the offset, against 'offset' and 'size' options. */
 184static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
 185                                    uint64_t bytes, bool is_write)
 186{
 187    BDRVRawState *s = bs->opaque;
 188
 189    if (s->has_size && (*offset > s->size || bytes > (s->size - *offset))) {
 190        /* There's not enough space for the write, or the read request is
 191         * out-of-range. Don't read/write anything to prevent leaking out of
 192         * the size specified in options. */
 193        return is_write ? -ENOSPC : -EINVAL;
 194    }
 195
 196    if (*offset > INT64_MAX - s->offset) {
 197        return -EINVAL;
 198    }
 199    *offset += s->offset;
 200
 201    return 0;
 202}
 203
 204static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
 205                                      uint64_t bytes, QEMUIOVector *qiov,
 206                                      int flags)
 207{
 208    int ret;
 209
 210    ret = raw_adjust_offset(bs, &offset, bytes, false);
 211    if (ret) {
 212        return ret;
 213    }
 214
 215    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
 216    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 217}
 218
 219static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
 220                                       uint64_t bytes, QEMUIOVector *qiov,
 221                                       int flags)
 222{
 223    void *buf = NULL;
 224    BlockDriver *drv;
 225    QEMUIOVector local_qiov;
 226    int ret;
 227
 228    if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
 229        /* Handling partial writes would be a pain - so we just
 230         * require that guests have 512-byte request alignment if
 231         * probing occurred */
 232        QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
 233        QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
 234        assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE);
 235
 236        buf = qemu_try_blockalign(bs->file->bs, 512);
 237        if (!buf) {
 238            ret = -ENOMEM;
 239            goto fail;
 240        }
 241
 242        ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
 243        if (ret != 512) {
 244            ret = -EINVAL;
 245            goto fail;
 246        }
 247
 248        drv = bdrv_probe_all(buf, 512, NULL);
 249        if (drv != bs->drv) {
 250            ret = -EPERM;
 251            goto fail;
 252        }
 253
 254        /* Use the checked buffer, a malicious guest might be overwriting its
 255         * original buffer in the background. */
 256        qemu_iovec_init(&local_qiov, qiov->niov + 1);
 257        qemu_iovec_add(&local_qiov, buf, 512);
 258        qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
 259        qiov = &local_qiov;
 260    }
 261
 262    ret = raw_adjust_offset(bs, &offset, bytes, true);
 263    if (ret) {
 264        goto fail;
 265    }
 266
 267    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
 268    ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 269
 270fail:
 271    if (qiov == &local_qiov) {
 272        qemu_iovec_destroy(&local_qiov);
 273    }
 274    qemu_vfree(buf);
 275    return ret;
 276}
 277
 278static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
 279                                            bool want_zero, int64_t offset,
 280                                            int64_t bytes, int64_t *pnum,
 281                                            int64_t *map,
 282                                            BlockDriverState **file)
 283{
 284    BDRVRawState *s = bs->opaque;
 285    *pnum = bytes;
 286    *file = bs->file->bs;
 287    *map = offset + s->offset;
 288    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
 289}
 290
 291static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
 292                                             int64_t offset, int bytes,
 293                                             BdrvRequestFlags flags)
 294{
 295    int ret;
 296
 297    ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
 298    if (ret) {
 299        return ret;
 300    }
 301    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 302}
 303
 304static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
 305                                        int64_t offset, int bytes)
 306{
 307    int ret;
 308
 309    ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
 310    if (ret) {
 311        return ret;
 312    }
 313    return bdrv_co_pdiscard(bs->file, offset, bytes);
 314}
 315
 316static int64_t raw_getlength(BlockDriverState *bs)
 317{
 318    int64_t len;
 319    BDRVRawState *s = bs->opaque;
 320
 321    /* Update size. It should not change unless the file was externally
 322     * modified. */
 323    len = bdrv_getlength(bs->file->bs);
 324    if (len < 0) {
 325        return len;
 326    }
 327
 328    if (len < s->offset) {
 329        s->size = 0;
 330    } else {
 331        if (s->has_size) {
 332            /* Try to honour the size */
 333            s->size = MIN(s->size, len - s->offset);
 334        } else {
 335            s->size = len - s->offset;
 336        }
 337    }
 338
 339    return s->size;
 340}
 341
 342static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs,
 343                                     Error **errp)
 344{
 345    BlockMeasureInfo *info;
 346    int64_t required;
 347
 348    if (in_bs) {
 349        required = bdrv_getlength(in_bs);
 350        if (required < 0) {
 351            error_setg_errno(errp, -required, "Unable to get image size");
 352            return NULL;
 353        }
 354    } else {
 355        required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
 356                            BDRV_SECTOR_SIZE);
 357    }
 358
 359    info = g_new0(BlockMeasureInfo, 1);
 360    info->required = required;
 361
 362    /* Unallocated sectors count towards the file size in raw images */
 363    info->fully_allocated = info->required;
 364    return info;
 365}
 366
 367static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 368{
 369    return bdrv_get_info(bs->file->bs, bdi);
 370}
 371
 372static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 373{
 374    if (bs->probed) {
 375        /* To make it easier to protect the first sector, any probed
 376         * image is restricted to read-modify-write on sub-sector
 377         * operations. */
 378        bs->bl.request_alignment = BDRV_SECTOR_SIZE;
 379    }
 380}
 381
 382static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
 383                                        bool exact, PreallocMode prealloc,
 384                                        BdrvRequestFlags flags, Error **errp)
 385{
 386    BDRVRawState *s = bs->opaque;
 387
 388    if (s->has_size) {
 389        error_setg(errp, "Cannot resize fixed-size raw disks");
 390        return -ENOTSUP;
 391    }
 392
 393    if (INT64_MAX - offset < s->offset) {
 394        error_setg(errp, "Disk size too large for the chosen offset");
 395        return -EINVAL;
 396    }
 397
 398    s->size = offset;
 399    offset += s->offset;
 400    return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
 401}
 402
 403static void raw_eject(BlockDriverState *bs, bool eject_flag)
 404{
 405    bdrv_eject(bs->file->bs, eject_flag);
 406}
 407
 408static void raw_lock_medium(BlockDriverState *bs, bool locked)
 409{
 410    bdrv_lock_medium(bs->file->bs, locked);
 411}
 412
 413static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 414{
 415    BDRVRawState *s = bs->opaque;
 416    if (s->offset || s->has_size) {
 417        return -ENOTSUP;
 418    }
 419    return bdrv_co_ioctl(bs->file->bs, req, buf);
 420}
 421
 422static int raw_has_zero_init(BlockDriverState *bs)
 423{
 424    return bdrv_has_zero_init(bs->file->bs);
 425}
 426
 427static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
 428                                           const char *filename,
 429                                           QemuOpts *opts,
 430                                           Error **errp)
 431{
 432    return bdrv_create_file(filename, opts, errp);
 433}
 434
 435static int raw_open(BlockDriverState *bs, QDict *options, int flags,
 436                    Error **errp)
 437{
 438    BDRVRawState *s = bs->opaque;
 439    bool has_size;
 440    uint64_t offset, size;
 441    BdrvChildRole file_role;
 442    int ret;
 443
 444    ret = raw_read_options(options, &offset, &has_size, &size, errp);
 445    if (ret < 0) {
 446        return ret;
 447    }
 448
 449    /*
 450     * Without offset and a size limit, this driver behaves very much
 451     * like a filter.  With any such limit, it does not.
 452     */
 453    if (offset || has_size) {
 454        file_role = BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY;
 455    } else {
 456        file_role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
 457    }
 458
 459    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
 460                               file_role, false, errp);
 461    if (!bs->file) {
 462        return -EINVAL;
 463    }
 464
 465    bs->sg = bs->file->bs->sg;
 466    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 467        (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 468    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
 469        ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 470            bs->file->bs->supported_zero_flags);
 471    bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags &
 472                                   BDRV_REQ_ZERO_WRITE;
 473
 474    if (bs->probed && !bdrv_is_read_only(bs)) {
 475        bdrv_refresh_filename(bs->file->bs);
 476        fprintf(stderr,
 477                "WARNING: Image format was not specified for '%s' and probing "
 478                "guessed raw.\n"
 479                "         Automatically detecting the format is dangerous for "
 480                "raw images, write operations on block 0 will be restricted.\n"
 481                "         Specify the 'raw' format explicitly to remove the "
 482                "restrictions.\n",
 483                bs->file->bs->filename);
 484    }
 485
 486    ret = raw_apply_options(bs, s, offset, has_size, size, errp);
 487    if (ret < 0) {
 488        return ret;
 489    }
 490
 491    if (bs->sg && (s->offset || s->has_size)) {
 492        error_setg(errp, "Cannot use offset/size with SCSI generic devices");
 493        return -EINVAL;
 494    }
 495
 496    return 0;
 497}
 498
 499static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
 500{
 501    /* smallest possible positive score so that raw is used if and only if no
 502     * other block driver works
 503     */
 504    return 1;
 505}
 506
 507static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 508{
 509    BDRVRawState *s = bs->opaque;
 510    int ret;
 511
 512    ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
 513    if (ret < 0) {
 514        return ret;
 515    }
 516
 517    if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
 518        return -ENOTSUP;
 519    }
 520
 521    return 0;
 522}
 523
 524static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 525{
 526    BDRVRawState *s = bs->opaque;
 527    if (s->offset || s->has_size) {
 528        return -ENOTSUP;
 529    }
 530    return bdrv_probe_geometry(bs->file->bs, geo);
 531}
 532
 533static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
 534                                               BdrvChild *src,
 535                                               uint64_t src_offset,
 536                                               BdrvChild *dst,
 537                                               uint64_t dst_offset,
 538                                               uint64_t bytes,
 539                                               BdrvRequestFlags read_flags,
 540                                               BdrvRequestFlags write_flags)
 541{
 542    int ret;
 543
 544    ret = raw_adjust_offset(bs, &src_offset, bytes, false);
 545    if (ret) {
 546        return ret;
 547    }
 548    return bdrv_co_copy_range_from(bs->file, src_offset, dst, dst_offset,
 549                                   bytes, read_flags, write_flags);
 550}
 551
 552static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
 553                                             BdrvChild *src,
 554                                             uint64_t src_offset,
 555                                             BdrvChild *dst,
 556                                             uint64_t dst_offset,
 557                                             uint64_t bytes,
 558                                             BdrvRequestFlags read_flags,
 559                                             BdrvRequestFlags write_flags)
 560{
 561    int ret;
 562
 563    ret = raw_adjust_offset(bs, &dst_offset, bytes, true);
 564    if (ret) {
 565        return ret;
 566    }
 567    return bdrv_co_copy_range_to(src, src_offset, bs->file, dst_offset, bytes,
 568                                 read_flags, write_flags);
 569}
 570
 571static const char *const raw_strong_runtime_opts[] = {
 572    "offset",
 573    "size",
 574
 575    NULL
 576};
 577
 578static void raw_cancel_in_flight(BlockDriverState *bs)
 579{
 580    bdrv_cancel_in_flight(bs->file->bs);
 581}
 582
 583BlockDriver bdrv_raw = {
 584    .format_name          = "raw",
 585    .instance_size        = sizeof(BDRVRawState),
 586    .bdrv_probe           = &raw_probe,
 587    .bdrv_reopen_prepare  = &raw_reopen_prepare,
 588    .bdrv_reopen_commit   = &raw_reopen_commit,
 589    .bdrv_reopen_abort    = &raw_reopen_abort,
 590    .bdrv_open            = &raw_open,
 591    .bdrv_child_perm      = bdrv_default_perms,
 592    .bdrv_co_create_opts  = &raw_co_create_opts,
 593    .bdrv_co_preadv       = &raw_co_preadv,
 594    .bdrv_co_pwritev      = &raw_co_pwritev,
 595    .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
 596    .bdrv_co_pdiscard     = &raw_co_pdiscard,
 597    .bdrv_co_block_status = &raw_co_block_status,
 598    .bdrv_co_copy_range_from = &raw_co_copy_range_from,
 599    .bdrv_co_copy_range_to  = &raw_co_copy_range_to,
 600    .bdrv_co_truncate     = &raw_co_truncate,
 601    .bdrv_getlength       = &raw_getlength,
 602    .is_format            = true,
 603    .has_variable_length  = true,
 604    .bdrv_measure         = &raw_measure,
 605    .bdrv_get_info        = &raw_get_info,
 606    .bdrv_refresh_limits  = &raw_refresh_limits,
 607    .bdrv_probe_blocksizes = &raw_probe_blocksizes,
 608    .bdrv_probe_geometry  = &raw_probe_geometry,
 609    .bdrv_eject           = &raw_eject,
 610    .bdrv_lock_medium     = &raw_lock_medium,
 611    .bdrv_co_ioctl        = &raw_co_ioctl,
 612    .create_opts          = &raw_create_opts,
 613    .bdrv_has_zero_init   = &raw_has_zero_init,
 614    .strong_runtime_opts  = raw_strong_runtime_opts,
 615    .mutable_opts         = mutable_opts,
 616    .bdrv_cancel_in_flight = raw_cancel_in_flight,
 617};
 618
 619static void bdrv_raw_init(void)
 620{
 621    bdrv_register(&bdrv_raw);
 622}
 623
 624block_init(bdrv_raw_init);
 625