qemu/block/blkdebug.c
<<
>>
Prefs
   1/*
   2 * Block protocol for I/O error injection
   3 *
   4 * Copyright (C) 2016-2017 Red Hat, Inc.
   5 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qapi/error.h"
  28#include "qemu/cutils.h"
  29#include "qemu/config-file.h"
  30#include "block/block_int.h"
  31#include "block/qdict.h"
  32#include "qemu/module.h"
  33#include "qemu/option.h"
  34#include "qapi/qapi-visit-block-core.h"
  35#include "qapi/qmp/qdict.h"
  36#include "qapi/qmp/qlist.h"
  37#include "qapi/qmp/qstring.h"
  38#include "qapi/qobject-input-visitor.h"
  39#include "sysemu/qtest.h"
  40
  41typedef struct BDRVBlkdebugState {
  42    int state;
  43    int new_state;
  44    uint64_t align;
  45    uint64_t max_transfer;
  46    uint64_t opt_write_zero;
  47    uint64_t max_write_zero;
  48    uint64_t opt_discard;
  49    uint64_t max_discard;
  50
  51    uint64_t take_child_perms;
  52    uint64_t unshare_child_perms;
  53
  54    /* For blkdebug_refresh_filename() */
  55    char *config_file;
  56
  57    QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
  58    QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
  59    QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
  60} BDRVBlkdebugState;
  61
  62typedef struct BlkdebugAIOCB {
  63    BlockAIOCB common;
  64    int ret;
  65} BlkdebugAIOCB;
  66
  67typedef struct BlkdebugSuspendedReq {
  68    Coroutine *co;
  69    char *tag;
  70    QLIST_ENTRY(BlkdebugSuspendedReq) next;
  71} BlkdebugSuspendedReq;
  72
  73enum {
  74    ACTION_INJECT_ERROR,
  75    ACTION_SET_STATE,
  76    ACTION_SUSPEND,
  77};
  78
  79typedef struct BlkdebugRule {
  80    BlkdebugEvent event;
  81    int action;
  82    int state;
  83    union {
  84        struct {
  85            uint64_t iotype_mask;
  86            int error;
  87            int immediately;
  88            int once;
  89            int64_t offset;
  90        } inject;
  91        struct {
  92            int new_state;
  93        } set_state;
  94        struct {
  95            char *tag;
  96        } suspend;
  97    } options;
  98    QLIST_ENTRY(BlkdebugRule) next;
  99    QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
 100} BlkdebugRule;
 101
 102QEMU_BUILD_BUG_MSG(BLKDEBUG_IO_TYPE__MAX > 64,
 103                   "BlkdebugIOType mask does not fit into an uint64_t");
 104
 105static QemuOptsList inject_error_opts = {
 106    .name = "inject-error",
 107    .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head),
 108    .desc = {
 109        {
 110            .name = "event",
 111            .type = QEMU_OPT_STRING,
 112        },
 113        {
 114            .name = "state",
 115            .type = QEMU_OPT_NUMBER,
 116        },
 117        {
 118            .name = "iotype",
 119            .type = QEMU_OPT_STRING,
 120        },
 121        {
 122            .name = "errno",
 123            .type = QEMU_OPT_NUMBER,
 124        },
 125        {
 126            .name = "sector",
 127            .type = QEMU_OPT_NUMBER,
 128        },
 129        {
 130            .name = "once",
 131            .type = QEMU_OPT_BOOL,
 132        },
 133        {
 134            .name = "immediately",
 135            .type = QEMU_OPT_BOOL,
 136        },
 137        { /* end of list */ }
 138    },
 139};
 140
 141static QemuOptsList set_state_opts = {
 142    .name = "set-state",
 143    .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head),
 144    .desc = {
 145        {
 146            .name = "event",
 147            .type = QEMU_OPT_STRING,
 148        },
 149        {
 150            .name = "state",
 151            .type = QEMU_OPT_NUMBER,
 152        },
 153        {
 154            .name = "new_state",
 155            .type = QEMU_OPT_NUMBER,
 156        },
 157        { /* end of list */ }
 158    },
 159};
 160
 161static QemuOptsList *config_groups[] = {
 162    &inject_error_opts,
 163    &set_state_opts,
 164    NULL
 165};
 166
 167struct add_rule_data {
 168    BDRVBlkdebugState *s;
 169    int action;
 170};
 171
 172static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
 173{
 174    struct add_rule_data *d = opaque;
 175    BDRVBlkdebugState *s = d->s;
 176    const char* event_name;
 177    int event;
 178    struct BlkdebugRule *rule;
 179    int64_t sector;
 180    BlkdebugIOType iotype;
 181    Error *local_error = NULL;
 182
 183    /* Find the right event for the rule */
 184    event_name = qemu_opt_get(opts, "event");
 185    if (!event_name) {
 186        error_setg(errp, "Missing event name for rule");
 187        return -1;
 188    }
 189    event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
 190    if (event < 0) {
 191        return -1;
 192    }
 193
 194    /* Set attributes common for all actions */
 195    rule = g_malloc0(sizeof(*rule));
 196    *rule = (struct BlkdebugRule) {
 197        .event  = event,
 198        .action = d->action,
 199        .state  = qemu_opt_get_number(opts, "state", 0),
 200    };
 201
 202    /* Parse action-specific options */
 203    switch (d->action) {
 204    case ACTION_INJECT_ERROR:
 205        rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO);
 206        rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
 207        rule->options.inject.immediately =
 208            qemu_opt_get_bool(opts, "immediately", 0);
 209        sector = qemu_opt_get_number(opts, "sector", -1);
 210        rule->options.inject.offset =
 211            sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
 212
 213        iotype = qapi_enum_parse(&BlkdebugIOType_lookup,
 214                                 qemu_opt_get(opts, "iotype"),
 215                                 BLKDEBUG_IO_TYPE__MAX, &local_error);
 216        if (local_error) {
 217            error_propagate(errp, local_error);
 218            return -1;
 219        }
 220        if (iotype != BLKDEBUG_IO_TYPE__MAX) {
 221            rule->options.inject.iotype_mask = (1ull << iotype);
 222        } else {
 223            /* Apply the default */
 224            rule->options.inject.iotype_mask =
 225                (1ull << BLKDEBUG_IO_TYPE_READ)
 226                | (1ull << BLKDEBUG_IO_TYPE_WRITE)
 227                | (1ull << BLKDEBUG_IO_TYPE_WRITE_ZEROES)
 228                | (1ull << BLKDEBUG_IO_TYPE_DISCARD)
 229                | (1ull << BLKDEBUG_IO_TYPE_FLUSH);
 230        }
 231
 232        break;
 233
 234    case ACTION_SET_STATE:
 235        rule->options.set_state.new_state =
 236            qemu_opt_get_number(opts, "new_state", 0);
 237        break;
 238
 239    case ACTION_SUSPEND:
 240        rule->options.suspend.tag =
 241            g_strdup(qemu_opt_get(opts, "tag"));
 242        break;
 243    };
 244
 245    /* Add the rule */
 246    QLIST_INSERT_HEAD(&s->rules[event], rule, next);
 247
 248    return 0;
 249}
 250
 251static void remove_rule(BlkdebugRule *rule)
 252{
 253    switch (rule->action) {
 254    case ACTION_INJECT_ERROR:
 255    case ACTION_SET_STATE:
 256        break;
 257    case ACTION_SUSPEND:
 258        g_free(rule->options.suspend.tag);
 259        break;
 260    }
 261
 262    QLIST_REMOVE(rule, next);
 263    g_free(rule);
 264}
 265
 266static int read_config(BDRVBlkdebugState *s, const char *filename,
 267                       QDict *options, Error **errp)
 268{
 269    FILE *f = NULL;
 270    int ret;
 271    struct add_rule_data d;
 272    Error *local_err = NULL;
 273
 274    if (filename) {
 275        f = fopen(filename, "r");
 276        if (f == NULL) {
 277            error_setg_errno(errp, errno, "Could not read blkdebug config file");
 278            return -errno;
 279        }
 280
 281        ret = qemu_config_parse(f, config_groups, filename);
 282        if (ret < 0) {
 283            error_setg(errp, "Could not parse blkdebug config file");
 284            goto fail;
 285        }
 286    }
 287
 288    qemu_config_parse_qdict(options, config_groups, &local_err);
 289    if (local_err) {
 290        error_propagate(errp, local_err);
 291        ret = -EINVAL;
 292        goto fail;
 293    }
 294
 295    d.s = s;
 296    d.action = ACTION_INJECT_ERROR;
 297    qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
 298    if (local_err) {
 299        error_propagate(errp, local_err);
 300        ret = -EINVAL;
 301        goto fail;
 302    }
 303
 304    d.action = ACTION_SET_STATE;
 305    qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
 306    if (local_err) {
 307        error_propagate(errp, local_err);
 308        ret = -EINVAL;
 309        goto fail;
 310    }
 311
 312    ret = 0;
 313fail:
 314    qemu_opts_reset(&inject_error_opts);
 315    qemu_opts_reset(&set_state_opts);
 316    if (f) {
 317        fclose(f);
 318    }
 319    return ret;
 320}
 321
 322/* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
 323static void blkdebug_parse_filename(const char *filename, QDict *options,
 324                                    Error **errp)
 325{
 326    const char *c;
 327
 328    /* Parse the blkdebug: prefix */
 329    if (!strstart(filename, "blkdebug:", &filename)) {
 330        /* There was no prefix; therefore, all options have to be already
 331           present in the QDict (except for the filename) */
 332        qdict_put_str(options, "x-image", filename);
 333        return;
 334    }
 335
 336    /* Parse config file path */
 337    c = strchr(filename, ':');
 338    if (c == NULL) {
 339        error_setg(errp, "blkdebug requires both config file and image path");
 340        return;
 341    }
 342
 343    if (c != filename) {
 344        QString *config_path;
 345        config_path = qstring_from_substr(filename, 0, c - filename);
 346        qdict_put(options, "config", config_path);
 347    }
 348
 349    /* TODO Allow multi-level nesting and set file.filename here */
 350    filename = c + 1;
 351    qdict_put_str(options, "x-image", filename);
 352}
 353
 354static int blkdebug_parse_perm_list(uint64_t *dest, QDict *options,
 355                                    const char *prefix, Error **errp)
 356{
 357    int ret = 0;
 358    QDict *subqdict = NULL;
 359    QObject *crumpled_subqdict = NULL;
 360    Visitor *v = NULL;
 361    BlockPermissionList *perm_list = NULL, *element;
 362    Error *local_err = NULL;
 363
 364    *dest = 0;
 365
 366    qdict_extract_subqdict(options, &subqdict, prefix);
 367    if (!qdict_size(subqdict)) {
 368        goto out;
 369    }
 370
 371    crumpled_subqdict = qdict_crumple(subqdict, errp);
 372    if (!crumpled_subqdict) {
 373        ret = -EINVAL;
 374        goto out;
 375    }
 376
 377    v = qobject_input_visitor_new(crumpled_subqdict);
 378    visit_type_BlockPermissionList(v, NULL, &perm_list, &local_err);
 379    if (local_err) {
 380        error_propagate(errp, local_err);
 381        ret = -EINVAL;
 382        goto out;
 383    }
 384
 385    for (element = perm_list; element; element = element->next) {
 386        *dest |= bdrv_qapi_perm_to_blk_perm(element->value);
 387    }
 388
 389out:
 390    qapi_free_BlockPermissionList(perm_list);
 391    visit_free(v);
 392    qobject_unref(subqdict);
 393    qobject_unref(crumpled_subqdict);
 394    return ret;
 395}
 396
 397static int blkdebug_parse_perms(BDRVBlkdebugState *s, QDict *options,
 398                                Error **errp)
 399{
 400    int ret;
 401
 402    ret = blkdebug_parse_perm_list(&s->take_child_perms, options,
 403                                   "take-child-perms.", errp);
 404    if (ret < 0) {
 405        return ret;
 406    }
 407
 408    ret = blkdebug_parse_perm_list(&s->unshare_child_perms, options,
 409                                   "unshare-child-perms.", errp);
 410    if (ret < 0) {
 411        return ret;
 412    }
 413
 414    return 0;
 415}
 416
 417static QemuOptsList runtime_opts = {
 418    .name = "blkdebug",
 419    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
 420    .desc = {
 421        {
 422            .name = "config",
 423            .type = QEMU_OPT_STRING,
 424            .help = "Path to the configuration file",
 425        },
 426        {
 427            .name = "x-image",
 428            .type = QEMU_OPT_STRING,
 429            .help = "[internal use only, will be removed]",
 430        },
 431        {
 432            .name = "align",
 433            .type = QEMU_OPT_SIZE,
 434            .help = "Required alignment in bytes",
 435        },
 436        {
 437            .name = "max-transfer",
 438            .type = QEMU_OPT_SIZE,
 439            .help = "Maximum transfer size in bytes",
 440        },
 441        {
 442            .name = "opt-write-zero",
 443            .type = QEMU_OPT_SIZE,
 444            .help = "Optimum write zero alignment in bytes",
 445        },
 446        {
 447            .name = "max-write-zero",
 448            .type = QEMU_OPT_SIZE,
 449            .help = "Maximum write zero size in bytes",
 450        },
 451        {
 452            .name = "opt-discard",
 453            .type = QEMU_OPT_SIZE,
 454            .help = "Optimum discard alignment in bytes",
 455        },
 456        {
 457            .name = "max-discard",
 458            .type = QEMU_OPT_SIZE,
 459            .help = "Maximum discard size in bytes",
 460        },
 461        { /* end of list */ }
 462    },
 463};
 464
 465static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
 466                         Error **errp)
 467{
 468    BDRVBlkdebugState *s = bs->opaque;
 469    QemuOpts *opts;
 470    Error *local_err = NULL;
 471    int ret;
 472    uint64_t align;
 473
 474    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
 475    qemu_opts_absorb_qdict(opts, options, &local_err);
 476    if (local_err) {
 477        error_propagate(errp, local_err);
 478        ret = -EINVAL;
 479        goto out;
 480    }
 481
 482    /* Read rules from config file or command line options */
 483    s->config_file = g_strdup(qemu_opt_get(opts, "config"));
 484    ret = read_config(s, s->config_file, options, errp);
 485    if (ret) {
 486        goto out;
 487    }
 488
 489    /* Set initial state */
 490    s->state = 1;
 491
 492    /* Parse permissions modifiers before opening the image file */
 493    ret = blkdebug_parse_perms(s, options, errp);
 494    if (ret < 0) {
 495        goto out;
 496    }
 497
 498    /* Open the image file */
 499    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
 500                               bs, &child_file, false, &local_err);
 501    if (local_err) {
 502        ret = -EINVAL;
 503        error_propagate(errp, local_err);
 504        goto out;
 505    }
 506
 507    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 508        (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 509    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
 510        ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 511            bs->file->bs->supported_zero_flags);
 512    ret = -EINVAL;
 513
 514    /* Set alignment overrides */
 515    s->align = qemu_opt_get_size(opts, "align", 0);
 516    if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
 517        error_setg(errp, "Cannot meet constraints with align %" PRIu64,
 518                   s->align);
 519        goto out;
 520    }
 521    align = MAX(s->align, bs->file->bs->bl.request_alignment);
 522
 523    s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
 524    if (s->max_transfer &&
 525        (s->max_transfer >= INT_MAX ||
 526         !QEMU_IS_ALIGNED(s->max_transfer, align))) {
 527        error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
 528                   s->max_transfer);
 529        goto out;
 530    }
 531
 532    s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
 533    if (s->opt_write_zero &&
 534        (s->opt_write_zero >= INT_MAX ||
 535         !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
 536        error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
 537                   s->opt_write_zero);
 538        goto out;
 539    }
 540
 541    s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
 542    if (s->max_write_zero &&
 543        (s->max_write_zero >= INT_MAX ||
 544         !QEMU_IS_ALIGNED(s->max_write_zero,
 545                          MAX(s->opt_write_zero, align)))) {
 546        error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
 547                   s->max_write_zero);
 548        goto out;
 549    }
 550
 551    s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
 552    if (s->opt_discard &&
 553        (s->opt_discard >= INT_MAX ||
 554         !QEMU_IS_ALIGNED(s->opt_discard, align))) {
 555        error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
 556                   s->opt_discard);
 557        goto out;
 558    }
 559
 560    s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
 561    if (s->max_discard &&
 562        (s->max_discard >= INT_MAX ||
 563         !QEMU_IS_ALIGNED(s->max_discard,
 564                          MAX(s->opt_discard, align)))) {
 565        error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
 566                   s->max_discard);
 567        goto out;
 568    }
 569
 570    bdrv_debug_event(bs, BLKDBG_NONE);
 571
 572    ret = 0;
 573out:
 574    if (ret < 0) {
 575        g_free(s->config_file);
 576    }
 577    qemu_opts_del(opts);
 578    return ret;
 579}
 580
 581static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 582                      BlkdebugIOType iotype)
 583{
 584    BDRVBlkdebugState *s = bs->opaque;
 585    BlkdebugRule *rule = NULL;
 586    int error;
 587    bool immediately;
 588
 589    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
 590        uint64_t inject_offset = rule->options.inject.offset;
 591
 592        if ((inject_offset == -1 ||
 593             (bytes && inject_offset >= offset &&
 594              inject_offset < offset + bytes)) &&
 595            (rule->options.inject.iotype_mask & (1ull << iotype)))
 596        {
 597            break;
 598        }
 599    }
 600
 601    if (!rule || !rule->options.inject.error) {
 602        return 0;
 603    }
 604
 605    immediately = rule->options.inject.immediately;
 606    error = rule->options.inject.error;
 607
 608    if (rule->options.inject.once) {
 609        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
 610        remove_rule(rule);
 611    }
 612
 613    if (!immediately) {
 614        aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
 615        qemu_coroutine_yield();
 616    }
 617
 618    return -error;
 619}
 620
 621static int coroutine_fn
 622blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 623                   QEMUIOVector *qiov, int flags)
 624{
 625    int err;
 626
 627    /* Sanity check block layer guarantees */
 628    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
 629    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
 630    if (bs->bl.max_transfer) {
 631        assert(bytes <= bs->bl.max_transfer);
 632    }
 633
 634    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_READ);
 635    if (err) {
 636        return err;
 637    }
 638
 639    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 640}
 641
 642static int coroutine_fn
 643blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 644                    QEMUIOVector *qiov, int flags)
 645{
 646    int err;
 647
 648    /* Sanity check block layer guarantees */
 649    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
 650    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
 651    if (bs->bl.max_transfer) {
 652        assert(bytes <= bs->bl.max_transfer);
 653    }
 654
 655    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE);
 656    if (err) {
 657        return err;
 658    }
 659
 660    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 661}
 662
 663static int blkdebug_co_flush(BlockDriverState *bs)
 664{
 665    int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
 666
 667    if (err) {
 668        return err;
 669    }
 670
 671    return bdrv_co_flush(bs->file->bs);
 672}
 673
 674static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
 675                                                  int64_t offset, int bytes,
 676                                                  BdrvRequestFlags flags)
 677{
 678    uint32_t align = MAX(bs->bl.request_alignment,
 679                         bs->bl.pwrite_zeroes_alignment);
 680    int err;
 681
 682    /* Only pass through requests that are larger than requested
 683     * preferred alignment (so that we test the fallback to writes on
 684     * unaligned portions), and check that the block layer never hands
 685     * us anything unaligned that crosses an alignment boundary.  */
 686    if (bytes < align) {
 687        assert(QEMU_IS_ALIGNED(offset, align) ||
 688               QEMU_IS_ALIGNED(offset + bytes, align) ||
 689               DIV_ROUND_UP(offset, align) ==
 690               DIV_ROUND_UP(offset + bytes, align));
 691        return -ENOTSUP;
 692    }
 693    assert(QEMU_IS_ALIGNED(offset, align));
 694    assert(QEMU_IS_ALIGNED(bytes, align));
 695    if (bs->bl.max_pwrite_zeroes) {
 696        assert(bytes <= bs->bl.max_pwrite_zeroes);
 697    }
 698
 699    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE_ZEROES);
 700    if (err) {
 701        return err;
 702    }
 703
 704    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 705}
 706
 707static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
 708                                             int64_t offset, int bytes)
 709{
 710    uint32_t align = bs->bl.pdiscard_alignment;
 711    int err;
 712
 713    /* Only pass through requests that are larger than requested
 714     * minimum alignment, and ensure that unaligned requests do not
 715     * cross optimum discard boundaries. */
 716    if (bytes < bs->bl.request_alignment) {
 717        assert(QEMU_IS_ALIGNED(offset, align) ||
 718               QEMU_IS_ALIGNED(offset + bytes, align) ||
 719               DIV_ROUND_UP(offset, align) ==
 720               DIV_ROUND_UP(offset + bytes, align));
 721        return -ENOTSUP;
 722    }
 723    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
 724    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
 725    if (align && bytes >= align) {
 726        assert(QEMU_IS_ALIGNED(offset, align));
 727        assert(QEMU_IS_ALIGNED(bytes, align));
 728    }
 729    if (bs->bl.max_pdiscard) {
 730        assert(bytes <= bs->bl.max_pdiscard);
 731    }
 732
 733    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_DISCARD);
 734    if (err) {
 735        return err;
 736    }
 737
 738    return bdrv_co_pdiscard(bs->file, offset, bytes);
 739}
 740
 741static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
 742                                                 bool want_zero,
 743                                                 int64_t offset,
 744                                                 int64_t bytes,
 745                                                 int64_t *pnum,
 746                                                 int64_t *map,
 747                                                 BlockDriverState **file)
 748{
 749    int err;
 750
 751    assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
 752
 753    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_BLOCK_STATUS);
 754    if (err) {
 755        return err;
 756    }
 757
 758    return bdrv_co_block_status_from_file(bs, want_zero, offset, bytes,
 759                                          pnum, map, file);
 760}
 761
 762static void blkdebug_close(BlockDriverState *bs)
 763{
 764    BDRVBlkdebugState *s = bs->opaque;
 765    BlkdebugRule *rule, *next;
 766    int i;
 767
 768    for (i = 0; i < BLKDBG__MAX; i++) {
 769        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
 770            remove_rule(rule);
 771        }
 772    }
 773
 774    g_free(s->config_file);
 775}
 776
 777static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
 778{
 779    BDRVBlkdebugState *s = bs->opaque;
 780    BlkdebugSuspendedReq r;
 781
 782    r = (BlkdebugSuspendedReq) {
 783        .co         = qemu_coroutine_self(),
 784        .tag        = g_strdup(rule->options.suspend.tag),
 785    };
 786
 787    remove_rule(rule);
 788    QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
 789
 790    if (!qtest_enabled()) {
 791        printf("blkdebug: Suspended request '%s'\n", r.tag);
 792    }
 793    qemu_coroutine_yield();
 794    if (!qtest_enabled()) {
 795        printf("blkdebug: Resuming request '%s'\n", r.tag);
 796    }
 797
 798    QLIST_REMOVE(&r, next);
 799    g_free(r.tag);
 800}
 801
 802static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
 803    bool injected)
 804{
 805    BDRVBlkdebugState *s = bs->opaque;
 806
 807    /* Only process rules for the current state */
 808    if (rule->state && rule->state != s->state) {
 809        return injected;
 810    }
 811
 812    /* Take the action */
 813    switch (rule->action) {
 814    case ACTION_INJECT_ERROR:
 815        if (!injected) {
 816            QSIMPLEQ_INIT(&s->active_rules);
 817            injected = true;
 818        }
 819        QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
 820        break;
 821
 822    case ACTION_SET_STATE:
 823        s->new_state = rule->options.set_state.new_state;
 824        break;
 825
 826    case ACTION_SUSPEND:
 827        suspend_request(bs, rule);
 828        break;
 829    }
 830    return injected;
 831}
 832
 833static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
 834{
 835    BDRVBlkdebugState *s = bs->opaque;
 836    struct BlkdebugRule *rule, *next;
 837    bool injected;
 838
 839    assert((int)event >= 0 && event < BLKDBG__MAX);
 840
 841    injected = false;
 842    s->new_state = s->state;
 843    QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
 844        injected = process_rule(bs, rule, injected);
 845    }
 846    s->state = s->new_state;
 847}
 848
 849static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
 850                                     const char *tag)
 851{
 852    BDRVBlkdebugState *s = bs->opaque;
 853    struct BlkdebugRule *rule;
 854    int blkdebug_event;
 855
 856    blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
 857    if (blkdebug_event < 0) {
 858        return -ENOENT;
 859    }
 860
 861    rule = g_malloc(sizeof(*rule));
 862    *rule = (struct BlkdebugRule) {
 863        .event  = blkdebug_event,
 864        .action = ACTION_SUSPEND,
 865        .state  = 0,
 866        .options.suspend.tag = g_strdup(tag),
 867    };
 868
 869    QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
 870
 871    return 0;
 872}
 873
 874static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
 875{
 876    BDRVBlkdebugState *s = bs->opaque;
 877    BlkdebugSuspendedReq *r, *next;
 878
 879    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
 880        if (!strcmp(r->tag, tag)) {
 881            qemu_coroutine_enter(r->co);
 882            return 0;
 883        }
 884    }
 885    return -ENOENT;
 886}
 887
 888static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
 889                                            const char *tag)
 890{
 891    BDRVBlkdebugState *s = bs->opaque;
 892    BlkdebugSuspendedReq *r, *r_next;
 893    BlkdebugRule *rule, *next;
 894    int i, ret = -ENOENT;
 895
 896    for (i = 0; i < BLKDBG__MAX; i++) {
 897        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
 898            if (rule->action == ACTION_SUSPEND &&
 899                !strcmp(rule->options.suspend.tag, tag)) {
 900                remove_rule(rule);
 901                ret = 0;
 902            }
 903        }
 904    }
 905    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
 906        if (!strcmp(r->tag, tag)) {
 907            qemu_coroutine_enter(r->co);
 908            ret = 0;
 909        }
 910    }
 911    return ret;
 912}
 913
 914static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
 915{
 916    BDRVBlkdebugState *s = bs->opaque;
 917    BlkdebugSuspendedReq *r;
 918
 919    QLIST_FOREACH(r, &s->suspended_reqs, next) {
 920        if (!strcmp(r->tag, tag)) {
 921            return true;
 922        }
 923    }
 924    return false;
 925}
 926
 927static int64_t blkdebug_getlength(BlockDriverState *bs)
 928{
 929    return bdrv_getlength(bs->file->bs);
 930}
 931
 932static void blkdebug_refresh_filename(BlockDriverState *bs)
 933{
 934    BDRVBlkdebugState *s = bs->opaque;
 935    const QDictEntry *e;
 936    int ret;
 937
 938    if (!bs->file->bs->exact_filename[0]) {
 939        return;
 940    }
 941
 942    for (e = qdict_first(bs->full_open_options); e;
 943         e = qdict_next(bs->full_open_options, e))
 944    {
 945        /* Real child options are under "image", but "x-image" may
 946         * contain a filename */
 947        if (strcmp(qdict_entry_key(e), "config") &&
 948            strcmp(qdict_entry_key(e), "image") &&
 949            strcmp(qdict_entry_key(e), "x-image") &&
 950            strcmp(qdict_entry_key(e), "driver"))
 951        {
 952            return;
 953        }
 954    }
 955
 956    ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
 957                   "blkdebug:%s:%s",
 958                   s->config_file ?: "", bs->file->bs->exact_filename);
 959    if (ret >= sizeof(bs->exact_filename)) {
 960        /* An overflow makes the filename unusable, so do not report any */
 961        bs->exact_filename[0] = 0;
 962    }
 963}
 964
 965static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
 966{
 967    BDRVBlkdebugState *s = bs->opaque;
 968
 969    if (s->align) {
 970        bs->bl.request_alignment = s->align;
 971    }
 972    if (s->max_transfer) {
 973        bs->bl.max_transfer = s->max_transfer;
 974    }
 975    if (s->opt_write_zero) {
 976        bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
 977    }
 978    if (s->max_write_zero) {
 979        bs->bl.max_pwrite_zeroes = s->max_write_zero;
 980    }
 981    if (s->opt_discard) {
 982        bs->bl.pdiscard_alignment = s->opt_discard;
 983    }
 984    if (s->max_discard) {
 985        bs->bl.max_pdiscard = s->max_discard;
 986    }
 987}
 988
 989static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
 990                                   BlockReopenQueue *queue, Error **errp)
 991{
 992    return 0;
 993}
 994
 995static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c,
 996                                const BdrvChildRole *role,
 997                                BlockReopenQueue *reopen_queue,
 998                                uint64_t perm, uint64_t shared,
 999                                uint64_t *nperm, uint64_t *nshared)
1000{
1001    BDRVBlkdebugState *s = bs->opaque;
1002
1003    bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
1004                              nperm, nshared);
1005
1006    *nperm |= s->take_child_perms;
1007    *nshared &= ~s->unshare_child_perms;
1008}
1009
1010static const char *const blkdebug_strong_runtime_opts[] = {
1011    "config",
1012    "inject-error.",
1013    "set-state.",
1014    "align",
1015    "max-transfer",
1016    "opt-write-zero",
1017    "max-write-zero",
1018    "opt-discard",
1019    "max-discard",
1020
1021    NULL
1022};
1023
1024static BlockDriver bdrv_blkdebug = {
1025    .format_name            = "blkdebug",
1026    .protocol_name          = "blkdebug",
1027    .instance_size          = sizeof(BDRVBlkdebugState),
1028    .is_filter              = true,
1029
1030    .bdrv_parse_filename    = blkdebug_parse_filename,
1031    .bdrv_file_open         = blkdebug_open,
1032    .bdrv_close             = blkdebug_close,
1033    .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
1034    .bdrv_child_perm        = blkdebug_child_perm,
1035
1036    .bdrv_getlength         = blkdebug_getlength,
1037    .bdrv_refresh_filename  = blkdebug_refresh_filename,
1038    .bdrv_refresh_limits    = blkdebug_refresh_limits,
1039
1040    .bdrv_co_preadv         = blkdebug_co_preadv,
1041    .bdrv_co_pwritev        = blkdebug_co_pwritev,
1042    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
1043    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
1044    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
1045    .bdrv_co_block_status   = blkdebug_co_block_status,
1046
1047    .bdrv_debug_event           = blkdebug_debug_event,
1048    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
1049    .bdrv_debug_remove_breakpoint
1050                                = blkdebug_debug_remove_breakpoint,
1051    .bdrv_debug_resume          = blkdebug_debug_resume,
1052    .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
1053
1054    .strong_runtime_opts        = blkdebug_strong_runtime_opts,
1055};
1056
1057static void bdrv_blkdebug_init(void)
1058{
1059    bdrv_register(&bdrv_blkdebug);
1060}
1061
1062block_init(bdrv_blkdebug_init);
1063