qemu/block/blkdebug.c
<<
>>
Prefs
   1/*
   2 * Block protocol for I/O error injection
   3 *
   4 * Copyright (C) 2016-2017 Red Hat, Inc.
   5 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "qapi/error.h"
  28#include "qemu/cutils.h"
  29#include "qemu/config-file.h"
  30#include "block/block-io.h"
  31#include "block/block_int.h"
  32#include "block/qdict.h"
  33#include "qemu/module.h"
  34#include "qemu/option.h"
  35#include "qapi/qapi-visit-block-core.h"
  36#include "qapi/qmp/qdict.h"
  37#include "qapi/qmp/qlist.h"
  38#include "qapi/qmp/qstring.h"
  39#include "qapi/qobject-input-visitor.h"
  40#include "sysemu/qtest.h"
  41
  42/* All APIs are thread-safe */
  43
  44typedef struct BDRVBlkdebugState {
  45    /* IN: initialized in blkdebug_open() and never changed */
  46    uint64_t align;
  47    uint64_t max_transfer;
  48    uint64_t opt_write_zero;
  49    uint64_t max_write_zero;
  50    uint64_t opt_discard;
  51    uint64_t max_discard;
  52    char *config_file; /* For blkdebug_refresh_filename() */
  53    /* initialized in blkdebug_parse_perms() */
  54    uint64_t take_child_perms;
  55    uint64_t unshare_child_perms;
  56
  57    /* State. Protected by lock */
  58    int state;
  59    QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
  60    QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
  61    QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
  62    QemuMutex lock;
  63} BDRVBlkdebugState;
  64
  65typedef struct BlkdebugAIOCB {
  66    BlockAIOCB common;
  67    int ret;
  68} BlkdebugAIOCB;
  69
  70typedef struct BlkdebugSuspendedReq {
  71    /* IN: initialized in suspend_request() */
  72    Coroutine *co;
  73    char *tag;
  74
  75    /* List entry protected BDRVBlkdebugState's lock */
  76    QLIST_ENTRY(BlkdebugSuspendedReq) next;
  77} BlkdebugSuspendedReq;
  78
  79enum {
  80    ACTION_INJECT_ERROR,
  81    ACTION_SET_STATE,
  82    ACTION_SUSPEND,
  83    ACTION__MAX,
  84};
  85
  86typedef struct BlkdebugRule {
  87    /* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */
  88    BlkdebugEvent event;
  89    int action;
  90    int state;
  91    union {
  92        struct {
  93            uint64_t iotype_mask;
  94            int error;
  95            int immediately;
  96            int once;
  97            int64_t offset;
  98        } inject;
  99        struct {
 100            int new_state;
 101        } set_state;
 102        struct {
 103            char *tag;
 104        } suspend;
 105    } options;
 106
 107    /* List entries protected BDRVBlkdebugState's lock */
 108    QLIST_ENTRY(BlkdebugRule) next;
 109    QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
 110} BlkdebugRule;
 111
 112QEMU_BUILD_BUG_MSG(BLKDEBUG_IO_TYPE__MAX > 64,
 113                   "BlkdebugIOType mask does not fit into an uint64_t");
 114
 115static QemuOptsList inject_error_opts = {
 116    .name = "inject-error",
 117    .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head),
 118    .desc = {
 119        {
 120            .name = "event",
 121            .type = QEMU_OPT_STRING,
 122        },
 123        {
 124            .name = "state",
 125            .type = QEMU_OPT_NUMBER,
 126        },
 127        {
 128            .name = "iotype",
 129            .type = QEMU_OPT_STRING,
 130        },
 131        {
 132            .name = "errno",
 133            .type = QEMU_OPT_NUMBER,
 134        },
 135        {
 136            .name = "sector",
 137            .type = QEMU_OPT_NUMBER,
 138        },
 139        {
 140            .name = "once",
 141            .type = QEMU_OPT_BOOL,
 142        },
 143        {
 144            .name = "immediately",
 145            .type = QEMU_OPT_BOOL,
 146        },
 147        { /* end of list */ }
 148    },
 149};
 150
 151static QemuOptsList set_state_opts = {
 152    .name = "set-state",
 153    .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head),
 154    .desc = {
 155        {
 156            .name = "event",
 157            .type = QEMU_OPT_STRING,
 158        },
 159        {
 160            .name = "state",
 161            .type = QEMU_OPT_NUMBER,
 162        },
 163        {
 164            .name = "new_state",
 165            .type = QEMU_OPT_NUMBER,
 166        },
 167        { /* end of list */ }
 168    },
 169};
 170
 171static QemuOptsList *config_groups[] = {
 172    &inject_error_opts,
 173    &set_state_opts,
 174    NULL
 175};
 176
 177struct add_rule_data {
 178    BDRVBlkdebugState *s;
 179    int action;
 180};
 181
 182static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
 183{
 184    struct add_rule_data *d = opaque;
 185    BDRVBlkdebugState *s = d->s;
 186    const char *event_name;
 187    int event;
 188    struct BlkdebugRule *rule;
 189    int64_t sector;
 190    BlkdebugIOType iotype;
 191    Error *local_error = NULL;
 192
 193    /* Find the right event for the rule */
 194    event_name = qemu_opt_get(opts, "event");
 195    if (!event_name) {
 196        error_setg(errp, "Missing event name for rule");
 197        return -1;
 198    }
 199    event = qapi_enum_parse(&BlkdebugEvent_lookup, event_name, -1, errp);
 200    if (event < 0) {
 201        return -1;
 202    }
 203
 204    /* Set attributes common for all actions */
 205    rule = g_malloc0(sizeof(*rule));
 206    *rule = (struct BlkdebugRule) {
 207        .event  = event,
 208        .action = d->action,
 209        .state  = qemu_opt_get_number(opts, "state", 0),
 210    };
 211
 212    /* Parse action-specific options */
 213    switch (d->action) {
 214    case ACTION_INJECT_ERROR:
 215        rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO);
 216        rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
 217        rule->options.inject.immediately =
 218            qemu_opt_get_bool(opts, "immediately", 0);
 219        sector = qemu_opt_get_number(opts, "sector", -1);
 220        rule->options.inject.offset =
 221            sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
 222
 223        iotype = qapi_enum_parse(&BlkdebugIOType_lookup,
 224                                 qemu_opt_get(opts, "iotype"),
 225                                 BLKDEBUG_IO_TYPE__MAX, &local_error);
 226        if (local_error) {
 227            error_propagate(errp, local_error);
 228            g_free(rule);
 229            return -1;
 230        }
 231        if (iotype != BLKDEBUG_IO_TYPE__MAX) {
 232            rule->options.inject.iotype_mask = (1ull << iotype);
 233        } else {
 234            /* Apply the default */
 235            rule->options.inject.iotype_mask =
 236                (1ull << BLKDEBUG_IO_TYPE_READ)
 237                | (1ull << BLKDEBUG_IO_TYPE_WRITE)
 238                | (1ull << BLKDEBUG_IO_TYPE_WRITE_ZEROES)
 239                | (1ull << BLKDEBUG_IO_TYPE_DISCARD)
 240                | (1ull << BLKDEBUG_IO_TYPE_FLUSH);
 241        }
 242
 243        break;
 244
 245    case ACTION_SET_STATE:
 246        rule->options.set_state.new_state =
 247            qemu_opt_get_number(opts, "new_state", 0);
 248        break;
 249
 250    case ACTION_SUSPEND:
 251        rule->options.suspend.tag =
 252            g_strdup(qemu_opt_get(opts, "tag"));
 253        break;
 254    };
 255
 256    /* Add the rule */
 257    qemu_mutex_lock(&s->lock);
 258    QLIST_INSERT_HEAD(&s->rules[event], rule, next);
 259    qemu_mutex_unlock(&s->lock);
 260
 261    return 0;
 262}
 263
 264/* Called with lock held or from .bdrv_close */
 265static void remove_rule(BlkdebugRule *rule)
 266{
 267    switch (rule->action) {
 268    case ACTION_INJECT_ERROR:
 269    case ACTION_SET_STATE:
 270        break;
 271    case ACTION_SUSPEND:
 272        g_free(rule->options.suspend.tag);
 273        break;
 274    }
 275
 276    QLIST_REMOVE(rule, next);
 277    g_free(rule);
 278}
 279
 280static int read_config(BDRVBlkdebugState *s, const char *filename,
 281                       QDict *options, Error **errp)
 282{
 283    FILE *f = NULL;
 284    int ret;
 285    struct add_rule_data d;
 286    Error *local_err = NULL;
 287
 288    if (filename) {
 289        f = fopen(filename, "r");
 290        if (f == NULL) {
 291            error_setg_errno(errp, errno, "Could not read blkdebug config file");
 292            return -errno;
 293        }
 294
 295        ret = qemu_config_parse(f, config_groups, filename, errp);
 296        if (ret < 0) {
 297            goto fail;
 298        }
 299    }
 300
 301    if (!qemu_config_parse_qdict(options, config_groups, errp)) {
 302        ret = -EINVAL;
 303        goto fail;
 304    }
 305
 306    d.s = s;
 307    d.action = ACTION_INJECT_ERROR;
 308    qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
 309    if (local_err) {
 310        error_propagate(errp, local_err);
 311        ret = -EINVAL;
 312        goto fail;
 313    }
 314
 315    d.action = ACTION_SET_STATE;
 316    qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
 317    if (local_err) {
 318        error_propagate(errp, local_err);
 319        ret = -EINVAL;
 320        goto fail;
 321    }
 322
 323    ret = 0;
 324fail:
 325    qemu_opts_reset(&inject_error_opts);
 326    qemu_opts_reset(&set_state_opts);
 327    if (f) {
 328        fclose(f);
 329    }
 330    return ret;
 331}
 332
 333/* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
 334static void blkdebug_parse_filename(const char *filename, QDict *options,
 335                                    Error **errp)
 336{
 337    const char *c;
 338
 339    /* Parse the blkdebug: prefix */
 340    if (!strstart(filename, "blkdebug:", &filename)) {
 341        /* There was no prefix; therefore, all options have to be already
 342           present in the QDict (except for the filename) */
 343        qdict_put_str(options, "x-image", filename);
 344        return;
 345    }
 346
 347    /* Parse config file path */
 348    c = strchr(filename, ':');
 349    if (c == NULL) {
 350        error_setg(errp, "blkdebug requires both config file and image path");
 351        return;
 352    }
 353
 354    if (c != filename) {
 355        QString *config_path;
 356        config_path = qstring_from_substr(filename, 0, c - filename);
 357        qdict_put(options, "config", config_path);
 358    }
 359
 360    /* TODO Allow multi-level nesting and set file.filename here */
 361    filename = c + 1;
 362    qdict_put_str(options, "x-image", filename);
 363}
 364
 365static int blkdebug_parse_perm_list(uint64_t *dest, QDict *options,
 366                                    const char *prefix, Error **errp)
 367{
 368    int ret = 0;
 369    QDict *subqdict = NULL;
 370    QObject *crumpled_subqdict = NULL;
 371    Visitor *v = NULL;
 372    BlockPermissionList *perm_list = NULL, *element;
 373
 374    *dest = 0;
 375
 376    qdict_extract_subqdict(options, &subqdict, prefix);
 377    if (!qdict_size(subqdict)) {
 378        goto out;
 379    }
 380
 381    crumpled_subqdict = qdict_crumple(subqdict, errp);
 382    if (!crumpled_subqdict) {
 383        ret = -EINVAL;
 384        goto out;
 385    }
 386
 387    v = qobject_input_visitor_new(crumpled_subqdict);
 388    if (!visit_type_BlockPermissionList(v, NULL, &perm_list, errp)) {
 389        ret = -EINVAL;
 390        goto out;
 391    }
 392
 393    for (element = perm_list; element; element = element->next) {
 394        *dest |= bdrv_qapi_perm_to_blk_perm(element->value);
 395    }
 396
 397out:
 398    qapi_free_BlockPermissionList(perm_list);
 399    visit_free(v);
 400    qobject_unref(subqdict);
 401    qobject_unref(crumpled_subqdict);
 402    return ret;
 403}
 404
 405static int blkdebug_parse_perms(BDRVBlkdebugState *s, QDict *options,
 406                                Error **errp)
 407{
 408    int ret;
 409
 410    ret = blkdebug_parse_perm_list(&s->take_child_perms, options,
 411                                   "take-child-perms.", errp);
 412    if (ret < 0) {
 413        return ret;
 414    }
 415
 416    ret = blkdebug_parse_perm_list(&s->unshare_child_perms, options,
 417                                   "unshare-child-perms.", errp);
 418    if (ret < 0) {
 419        return ret;
 420    }
 421
 422    return 0;
 423}
 424
 425static QemuOptsList runtime_opts = {
 426    .name = "blkdebug",
 427    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
 428    .desc = {
 429        {
 430            .name = "config",
 431            .type = QEMU_OPT_STRING,
 432            .help = "Path to the configuration file",
 433        },
 434        {
 435            .name = "x-image",
 436            .type = QEMU_OPT_STRING,
 437            .help = "[internal use only, will be removed]",
 438        },
 439        {
 440            .name = "align",
 441            .type = QEMU_OPT_SIZE,
 442            .help = "Required alignment in bytes",
 443        },
 444        {
 445            .name = "max-transfer",
 446            .type = QEMU_OPT_SIZE,
 447            .help = "Maximum transfer size in bytes",
 448        },
 449        {
 450            .name = "opt-write-zero",
 451            .type = QEMU_OPT_SIZE,
 452            .help = "Optimum write zero alignment in bytes",
 453        },
 454        {
 455            .name = "max-write-zero",
 456            .type = QEMU_OPT_SIZE,
 457            .help = "Maximum write zero size in bytes",
 458        },
 459        {
 460            .name = "opt-discard",
 461            .type = QEMU_OPT_SIZE,
 462            .help = "Optimum discard alignment in bytes",
 463        },
 464        {
 465            .name = "max-discard",
 466            .type = QEMU_OPT_SIZE,
 467            .help = "Maximum discard size in bytes",
 468        },
 469        { /* end of list */ }
 470    },
 471};
 472
 473static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
 474                         Error **errp)
 475{
 476    BDRVBlkdebugState *s = bs->opaque;
 477    QemuOpts *opts;
 478    int ret;
 479    uint64_t align;
 480
 481    qemu_mutex_init(&s->lock);
 482    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
 483    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
 484        ret = -EINVAL;
 485        goto out;
 486    }
 487
 488    /* Read rules from config file or command line options */
 489    s->config_file = g_strdup(qemu_opt_get(opts, "config"));
 490    ret = read_config(s, s->config_file, options, errp);
 491    if (ret) {
 492        goto out;
 493    }
 494
 495    /* Set initial state */
 496    s->state = 1;
 497
 498    /* Parse permissions modifiers before opening the image file */
 499    ret = blkdebug_parse_perms(s, options, errp);
 500    if (ret < 0) {
 501        goto out;
 502    }
 503
 504    /* Open the image file */
 505    ret = bdrv_open_file_child(qemu_opt_get(opts, "x-image"), options, "image",
 506                               bs, errp);
 507    if (ret < 0) {
 508        goto out;
 509    }
 510
 511    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 512        (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 513    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
 514        ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 515            bs->file->bs->supported_zero_flags);
 516    ret = -EINVAL;
 517
 518    /* Set alignment overrides */
 519    s->align = qemu_opt_get_size(opts, "align", 0);
 520    if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) {
 521        error_setg(errp, "Cannot meet constraints with align %" PRIu64,
 522                   s->align);
 523        goto out;
 524    }
 525    align = MAX(s->align, bs->file->bs->bl.request_alignment);
 526
 527    s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
 528    if (s->max_transfer &&
 529        (s->max_transfer >= INT_MAX ||
 530         !QEMU_IS_ALIGNED(s->max_transfer, align))) {
 531        error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
 532                   s->max_transfer);
 533        goto out;
 534    }
 535
 536    s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0);
 537    if (s->opt_write_zero &&
 538        (s->opt_write_zero >= INT_MAX ||
 539         !QEMU_IS_ALIGNED(s->opt_write_zero, align))) {
 540        error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64,
 541                   s->opt_write_zero);
 542        goto out;
 543    }
 544
 545    s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0);
 546    if (s->max_write_zero &&
 547        (s->max_write_zero >= INT_MAX ||
 548         !QEMU_IS_ALIGNED(s->max_write_zero,
 549                          MAX(s->opt_write_zero, align)))) {
 550        error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64,
 551                   s->max_write_zero);
 552        goto out;
 553    }
 554
 555    s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0);
 556    if (s->opt_discard &&
 557        (s->opt_discard >= INT_MAX ||
 558         !QEMU_IS_ALIGNED(s->opt_discard, align))) {
 559        error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64,
 560                   s->opt_discard);
 561        goto out;
 562    }
 563
 564    s->max_discard = qemu_opt_get_size(opts, "max-discard", 0);
 565    if (s->max_discard &&
 566        (s->max_discard >= INT_MAX ||
 567         !QEMU_IS_ALIGNED(s->max_discard,
 568                          MAX(s->opt_discard, align)))) {
 569        error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64,
 570                   s->max_discard);
 571        goto out;
 572    }
 573
 574    bdrv_debug_event(bs, BLKDBG_NONE);
 575
 576    ret = 0;
 577out:
 578    if (ret < 0) {
 579        qemu_mutex_destroy(&s->lock);
 580        g_free(s->config_file);
 581    }
 582    qemu_opts_del(opts);
 583    return ret;
 584}
 585
 586static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 587                      BlkdebugIOType iotype)
 588{
 589    BDRVBlkdebugState *s = bs->opaque;
 590    BlkdebugRule *rule = NULL;
 591    int error;
 592    bool immediately;
 593
 594    qemu_mutex_lock(&s->lock);
 595    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
 596        uint64_t inject_offset = rule->options.inject.offset;
 597
 598        if ((inject_offset == -1 ||
 599             (bytes && inject_offset >= offset &&
 600              inject_offset < offset + bytes)) &&
 601            (rule->options.inject.iotype_mask & (1ull << iotype)))
 602        {
 603            break;
 604        }
 605    }
 606
 607    if (!rule || !rule->options.inject.error) {
 608        qemu_mutex_unlock(&s->lock);
 609        return 0;
 610    }
 611
 612    immediately = rule->options.inject.immediately;
 613    error = rule->options.inject.error;
 614
 615    if (rule->options.inject.once) {
 616        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
 617        remove_rule(rule);
 618    }
 619
 620    qemu_mutex_unlock(&s->lock);
 621    if (!immediately) {
 622        aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
 623        qemu_coroutine_yield();
 624    }
 625
 626    return -error;
 627}
 628
 629static int coroutine_fn GRAPH_RDLOCK
 630blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
 631                   QEMUIOVector *qiov, BdrvRequestFlags flags)
 632{
 633    int err;
 634
 635    /* Sanity check block layer guarantees */
 636    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
 637    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
 638    if (bs->bl.max_transfer) {
 639        assert(bytes <= bs->bl.max_transfer);
 640    }
 641
 642    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_READ);
 643    if (err) {
 644        return err;
 645    }
 646
 647    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 648}
 649
 650static int coroutine_fn GRAPH_RDLOCK
 651blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
 652                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 653{
 654    int err;
 655
 656    /* Sanity check block layer guarantees */
 657    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
 658    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
 659    if (bs->bl.max_transfer) {
 660        assert(bytes <= bs->bl.max_transfer);
 661    }
 662
 663    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE);
 664    if (err) {
 665        return err;
 666    }
 667
 668    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 669}
 670
 671static int GRAPH_RDLOCK coroutine_fn blkdebug_co_flush(BlockDriverState *bs)
 672{
 673    int err = rule_check(bs, 0, 0, BLKDEBUG_IO_TYPE_FLUSH);
 674
 675    if (err) {
 676        return err;
 677    }
 678
 679    return bdrv_co_flush(bs->file->bs);
 680}
 681
 682static int coroutine_fn GRAPH_RDLOCK
 683blkdebug_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
 684                          BdrvRequestFlags flags)
 685{
 686    uint32_t align = MAX(bs->bl.request_alignment,
 687                         bs->bl.pwrite_zeroes_alignment);
 688    int err;
 689
 690    /* Only pass through requests that are larger than requested
 691     * preferred alignment (so that we test the fallback to writes on
 692     * unaligned portions), and check that the block layer never hands
 693     * us anything unaligned that crosses an alignment boundary.  */
 694    if (bytes < align) {
 695        assert(QEMU_IS_ALIGNED(offset, align) ||
 696               QEMU_IS_ALIGNED(offset + bytes, align) ||
 697               DIV_ROUND_UP(offset, align) ==
 698               DIV_ROUND_UP(offset + bytes, align));
 699        return -ENOTSUP;
 700    }
 701    assert(QEMU_IS_ALIGNED(offset, align));
 702    assert(QEMU_IS_ALIGNED(bytes, align));
 703    if (bs->bl.max_pwrite_zeroes) {
 704        assert(bytes <= bs->bl.max_pwrite_zeroes);
 705    }
 706
 707    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_WRITE_ZEROES);
 708    if (err) {
 709        return err;
 710    }
 711
 712    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 713}
 714
 715static int coroutine_fn GRAPH_RDLOCK
 716blkdebug_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 717{
 718    uint32_t align = bs->bl.pdiscard_alignment;
 719    int err;
 720
 721    /* Only pass through requests that are larger than requested
 722     * minimum alignment, and ensure that unaligned requests do not
 723     * cross optimum discard boundaries. */
 724    if (bytes < bs->bl.request_alignment) {
 725        assert(QEMU_IS_ALIGNED(offset, align) ||
 726               QEMU_IS_ALIGNED(offset + bytes, align) ||
 727               DIV_ROUND_UP(offset, align) ==
 728               DIV_ROUND_UP(offset + bytes, align));
 729        return -ENOTSUP;
 730    }
 731    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
 732    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
 733    if (align && bytes >= align) {
 734        assert(QEMU_IS_ALIGNED(offset, align));
 735        assert(QEMU_IS_ALIGNED(bytes, align));
 736    }
 737    if (bs->bl.max_pdiscard) {
 738        assert(bytes <= bs->bl.max_pdiscard);
 739    }
 740
 741    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_DISCARD);
 742    if (err) {
 743        return err;
 744    }
 745
 746    return bdrv_co_pdiscard(bs->file, offset, bytes);
 747}
 748
 749static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
 750                                                 bool want_zero,
 751                                                 int64_t offset,
 752                                                 int64_t bytes,
 753                                                 int64_t *pnum,
 754                                                 int64_t *map,
 755                                                 BlockDriverState **file)
 756{
 757    int err;
 758
 759    assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
 760
 761    err = rule_check(bs, offset, bytes, BLKDEBUG_IO_TYPE_BLOCK_STATUS);
 762    if (err) {
 763        return err;
 764    }
 765
 766    assert(bs->file && bs->file->bs);
 767    *pnum = bytes;
 768    *map = offset;
 769    *file = bs->file->bs;
 770    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
 771}
 772
 773static void blkdebug_close(BlockDriverState *bs)
 774{
 775    BDRVBlkdebugState *s = bs->opaque;
 776    BlkdebugRule *rule, *next;
 777    int i;
 778
 779    for (i = 0; i < BLKDBG__MAX; i++) {
 780        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
 781            remove_rule(rule);
 782        }
 783    }
 784
 785    g_free(s->config_file);
 786    qemu_mutex_destroy(&s->lock);
 787}
 788
 789/* Called with lock held.  */
 790static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
 791{
 792    BDRVBlkdebugState *s = bs->opaque;
 793    BlkdebugSuspendedReq *r;
 794
 795    r = g_new(BlkdebugSuspendedReq, 1);
 796
 797    r->co         = qemu_coroutine_self();
 798    r->tag        = g_strdup(rule->options.suspend.tag);
 799
 800    remove_rule(rule);
 801    QLIST_INSERT_HEAD(&s->suspended_reqs, r, next);
 802
 803    if (!qtest_enabled()) {
 804        printf("blkdebug: Suspended request '%s'\n", r->tag);
 805    }
 806}
 807
 808/* Called with lock held.  */
 809static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
 810                         int *action_count, int *new_state)
 811{
 812    BDRVBlkdebugState *s = bs->opaque;
 813
 814    /* Only process rules for the current state */
 815    if (rule->state && rule->state != s->state) {
 816        return;
 817    }
 818
 819    /* Take the action */
 820    action_count[rule->action]++;
 821    switch (rule->action) {
 822    case ACTION_INJECT_ERROR:
 823        if (action_count[ACTION_INJECT_ERROR] == 1) {
 824            QSIMPLEQ_INIT(&s->active_rules);
 825        }
 826        QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
 827        break;
 828
 829    case ACTION_SET_STATE:
 830        *new_state = rule->options.set_state.new_state;
 831        break;
 832
 833    case ACTION_SUSPEND:
 834        suspend_request(bs, rule);
 835        break;
 836    }
 837}
 838
 839static void coroutine_fn
 840blkdebug_co_debug_event(BlockDriverState *bs, BlkdebugEvent event)
 841{
 842    BDRVBlkdebugState *s = bs->opaque;
 843    struct BlkdebugRule *rule, *next;
 844    int new_state;
 845    int actions_count[ACTION__MAX] = { 0 };
 846
 847    assert((int)event >= 0 && event < BLKDBG__MAX);
 848
 849    WITH_QEMU_LOCK_GUARD(&s->lock) {
 850        new_state = s->state;
 851        QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
 852            process_rule(bs, rule, actions_count, &new_state);
 853        }
 854        s->state = new_state;
 855    }
 856
 857    while (actions_count[ACTION_SUSPEND] > 0) {
 858        qemu_coroutine_yield();
 859        actions_count[ACTION_SUSPEND]--;
 860    }
 861}
 862
 863static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
 864                                     const char *tag)
 865{
 866    BDRVBlkdebugState *s = bs->opaque;
 867    struct BlkdebugRule *rule;
 868    int blkdebug_event;
 869
 870    blkdebug_event = qapi_enum_parse(&BlkdebugEvent_lookup, event, -1, NULL);
 871    if (blkdebug_event < 0) {
 872        return -ENOENT;
 873    }
 874
 875    rule = g_malloc(sizeof(*rule));
 876    *rule = (struct BlkdebugRule) {
 877        .event  = blkdebug_event,
 878        .action = ACTION_SUSPEND,
 879        .state  = 0,
 880        .options.suspend.tag = g_strdup(tag),
 881    };
 882
 883    qemu_mutex_lock(&s->lock);
 884    QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
 885    qemu_mutex_unlock(&s->lock);
 886
 887    return 0;
 888}
 889
 890/* Called with lock held. May temporarily release lock. */
 891static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all)
 892{
 893    BlkdebugSuspendedReq *r;
 894
 895retry:
 896    /*
 897     * No need for _SAFE, since a different coroutine can remove another node
 898     * (not the current one) in this list, and when the current one is removed
 899     * the iteration starts back from beginning anyways.
 900     */
 901    QLIST_FOREACH(r, &s->suspended_reqs, next) {
 902        if (!strcmp(r->tag, tag)) {
 903            Coroutine *co = r->co;
 904
 905            if (!qtest_enabled()) {
 906                printf("blkdebug: Resuming request '%s'\n", r->tag);
 907            }
 908
 909            QLIST_REMOVE(r, next);
 910            g_free(r->tag);
 911            g_free(r);
 912
 913            qemu_mutex_unlock(&s->lock);
 914            qemu_coroutine_enter(co);
 915            qemu_mutex_lock(&s->lock);
 916
 917            if (all) {
 918                goto retry;
 919            }
 920            return 0;
 921        }
 922    }
 923    return -ENOENT;
 924}
 925
 926static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
 927{
 928    BDRVBlkdebugState *s = bs->opaque;
 929    QEMU_LOCK_GUARD(&s->lock);
 930    return resume_req_by_tag(s, tag, false);
 931}
 932
 933static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
 934                                            const char *tag)
 935{
 936    BDRVBlkdebugState *s = bs->opaque;
 937    BlkdebugRule *rule, *next;
 938    int i, ret = -ENOENT;
 939
 940    QEMU_LOCK_GUARD(&s->lock);
 941    for (i = 0; i < BLKDBG__MAX; i++) {
 942        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
 943            if (rule->action == ACTION_SUSPEND &&
 944                !strcmp(rule->options.suspend.tag, tag)) {
 945                remove_rule(rule);
 946                ret = 0;
 947            }
 948        }
 949    }
 950    if (resume_req_by_tag(s, tag, true) == 0) {
 951        ret = 0;
 952    }
 953    return ret;
 954}
 955
 956static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
 957{
 958    BDRVBlkdebugState *s = bs->opaque;
 959    BlkdebugSuspendedReq *r;
 960
 961    QEMU_LOCK_GUARD(&s->lock);
 962    QLIST_FOREACH(r, &s->suspended_reqs, next) {
 963        if (!strcmp(r->tag, tag)) {
 964            return true;
 965        }
 966    }
 967    return false;
 968}
 969
 970static int64_t coroutine_fn GRAPH_RDLOCK
 971blkdebug_co_getlength(BlockDriverState *bs)
 972{
 973    return bdrv_co_getlength(bs->file->bs);
 974}
 975
 976static void blkdebug_refresh_filename(BlockDriverState *bs)
 977{
 978    BDRVBlkdebugState *s = bs->opaque;
 979    const QDictEntry *e;
 980    int ret;
 981
 982    if (!bs->file->bs->exact_filename[0]) {
 983        return;
 984    }
 985
 986    for (e = qdict_first(bs->full_open_options); e;
 987         e = qdict_next(bs->full_open_options, e))
 988    {
 989        /* Real child options are under "image", but "x-image" may
 990         * contain a filename */
 991        if (strcmp(qdict_entry_key(e), "config") &&
 992            strcmp(qdict_entry_key(e), "image") &&
 993            strcmp(qdict_entry_key(e), "x-image") &&
 994            strcmp(qdict_entry_key(e), "driver"))
 995        {
 996            return;
 997        }
 998    }
 999
1000    ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
1001                   "blkdebug:%s:%s",
1002                   s->config_file ?: "", bs->file->bs->exact_filename);
1003    if (ret >= sizeof(bs->exact_filename)) {
1004        /* An overflow makes the filename unusable, so do not report any */
1005        bs->exact_filename[0] = 0;
1006    }
1007}
1008
1009static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
1010{
1011    BDRVBlkdebugState *s = bs->opaque;
1012
1013    if (s->align) {
1014        bs->bl.request_alignment = s->align;
1015    }
1016    if (s->max_transfer) {
1017        bs->bl.max_transfer = s->max_transfer;
1018    }
1019    if (s->opt_write_zero) {
1020        bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
1021    }
1022    if (s->max_write_zero) {
1023        bs->bl.max_pwrite_zeroes = s->max_write_zero;
1024    }
1025    if (s->opt_discard) {
1026        bs->bl.pdiscard_alignment = s->opt_discard;
1027    }
1028    if (s->max_discard) {
1029        bs->bl.max_pdiscard = s->max_discard;
1030    }
1031}
1032
1033static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
1034                                   BlockReopenQueue *queue, Error **errp)
1035{
1036    return 0;
1037}
1038
1039static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c,
1040                                BdrvChildRole role,
1041                                BlockReopenQueue *reopen_queue,
1042                                uint64_t perm, uint64_t shared,
1043                                uint64_t *nperm, uint64_t *nshared)
1044{
1045    BDRVBlkdebugState *s = bs->opaque;
1046
1047    bdrv_default_perms(bs, c, role, reopen_queue,
1048                       perm, shared, nperm, nshared);
1049
1050    *nperm |= s->take_child_perms;
1051    *nshared &= ~s->unshare_child_perms;
1052}
1053
1054static const char *const blkdebug_strong_runtime_opts[] = {
1055    "config",
1056    "inject-error.",
1057    "set-state.",
1058    "align",
1059    "max-transfer",
1060    "opt-write-zero",
1061    "max-write-zero",
1062    "opt-discard",
1063    "max-discard",
1064
1065    NULL
1066};
1067
1068static BlockDriver bdrv_blkdebug = {
1069    .format_name            = "blkdebug",
1070    .protocol_name          = "blkdebug",
1071    .instance_size          = sizeof(BDRVBlkdebugState),
1072    .is_filter              = true,
1073
1074    .bdrv_parse_filename    = blkdebug_parse_filename,
1075    .bdrv_file_open         = blkdebug_open,
1076    .bdrv_close             = blkdebug_close,
1077    .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
1078    .bdrv_child_perm        = blkdebug_child_perm,
1079
1080    .bdrv_co_getlength      = blkdebug_co_getlength,
1081    .bdrv_refresh_filename  = blkdebug_refresh_filename,
1082    .bdrv_refresh_limits    = blkdebug_refresh_limits,
1083
1084    .bdrv_co_preadv         = blkdebug_co_preadv,
1085    .bdrv_co_pwritev        = blkdebug_co_pwritev,
1086    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
1087    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
1088    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
1089    .bdrv_co_block_status   = blkdebug_co_block_status,
1090
1091    .bdrv_co_debug_event        = blkdebug_co_debug_event,
1092    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
1093    .bdrv_debug_remove_breakpoint
1094                                = blkdebug_debug_remove_breakpoint,
1095    .bdrv_debug_resume          = blkdebug_debug_resume,
1096    .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
1097
1098    .strong_runtime_opts        = blkdebug_strong_runtime_opts,
1099};
1100
1101static void bdrv_blkdebug_init(void)
1102{
1103    bdrv_register(&bdrv_blkdebug);
1104}
1105
1106block_init(bdrv_blkdebug_init);
1107