qemu/blockjob.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator block driver
   3 *
   4 * Copyright (c) 2011 IBM Corp.
   5 * Copyright (c) 2012 Red Hat, Inc.
   6 *
   7 * Permission is hereby granted, free of charge, to any person obtaining a copy
   8 * of this software and associated documentation files (the "Software"), to deal
   9 * in the Software without restriction, including without limitation the rights
  10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 * copies of the Software, and to permit persons to whom the Software is
  12 * furnished to do so, subject to the following conditions:
  13 *
  14 * The above copyright notice and this permission notice shall be included in
  15 * all copies or substantial portions of the Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23 * THE SOFTWARE.
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "block/aio-wait.h"
  28#include "block/block.h"
  29#include "block/blockjob_int.h"
  30#include "block/block_int.h"
  31#include "block/trace.h"
  32#include "system/block-backend.h"
  33#include "qapi/error.h"
  34#include "qapi/qapi-events-block-core.h"
  35#include "qapi/qmp/qerror.h"
  36#include "qemu/main-loop.h"
  37#include "qemu/timer.h"
  38
  39static bool is_block_job(Job *job)
  40{
  41    return job_type(job) == JOB_TYPE_BACKUP ||
  42           job_type(job) == JOB_TYPE_COMMIT ||
  43           job_type(job) == JOB_TYPE_MIRROR ||
  44           job_type(job) == JOB_TYPE_STREAM;
  45}
  46
  47BlockJob *block_job_next_locked(BlockJob *bjob)
  48{
  49    Job *job = bjob ? &bjob->job : NULL;
  50    GLOBAL_STATE_CODE();
  51
  52    do {
  53        job = job_next_locked(job);
  54    } while (job && !is_block_job(job));
  55
  56    return job ? container_of(job, BlockJob, job) : NULL;
  57}
  58
  59BlockJob *block_job_get_locked(const char *id)
  60{
  61    Job *job = job_get_locked(id);
  62    GLOBAL_STATE_CODE();
  63
  64    if (job && is_block_job(job)) {
  65        return container_of(job, BlockJob, job);
  66    } else {
  67        return NULL;
  68    }
  69}
  70
  71BlockJob *block_job_get(const char *id)
  72{
  73    JOB_LOCK_GUARD();
  74    return block_job_get_locked(id);
  75}
  76
  77void block_job_free(Job *job)
  78{
  79    BlockJob *bjob = container_of(job, BlockJob, job);
  80    GLOBAL_STATE_CODE();
  81
  82    block_job_remove_all_bdrv(bjob);
  83    ratelimit_destroy(&bjob->limit);
  84    error_free(bjob->blocker);
  85}
  86
  87static char *child_job_get_parent_desc(BdrvChild *c)
  88{
  89    BlockJob *job = c->opaque;
  90    return g_strdup_printf("%s job '%s'", job_type_str(&job->job), job->job.id);
  91}
  92
  93static void child_job_drained_begin(BdrvChild *c)
  94{
  95    BlockJob *job = c->opaque;
  96    job_pause(&job->job);
  97}
  98
  99static bool child_job_drained_poll(BdrvChild *c)
 100{
 101    BlockJob *bjob = c->opaque;
 102    Job *job = &bjob->job;
 103    const BlockJobDriver *drv = block_job_driver(bjob);
 104
 105    /* An inactive or completed job doesn't have any pending requests. Jobs
 106     * with !job->busy are either already paused or have a pause point after
 107     * being reentered, so no job driver code will run before they pause. */
 108    WITH_JOB_LOCK_GUARD() {
 109        if (!job->busy || job_is_completed_locked(job)) {
 110            return false;
 111        }
 112    }
 113
 114    /* Otherwise, assume that it isn't fully stopped yet, but allow the job to
 115     * override this assumption. */
 116    if (drv->drained_poll) {
 117        return drv->drained_poll(bjob);
 118    } else {
 119        return true;
 120    }
 121}
 122
 123static void child_job_drained_end(BdrvChild *c)
 124{
 125    BlockJob *job = c->opaque;
 126    job_resume(&job->job);
 127}
 128
 129typedef struct BdrvStateChildJobContext {
 130    AioContext *new_ctx;
 131    BlockJob *job;
 132} BdrvStateChildJobContext;
 133
 134static void child_job_set_aio_ctx_commit(void *opaque)
 135{
 136    BdrvStateChildJobContext *s = opaque;
 137    BlockJob *job = s->job;
 138
 139    job_set_aio_context(&job->job, s->new_ctx);
 140}
 141
 142static TransactionActionDrv change_child_job_context = {
 143    .commit = child_job_set_aio_ctx_commit,
 144    .clean = g_free,
 145};
 146
 147static bool GRAPH_RDLOCK
 148child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx, GHashTable *visited,
 149                         Transaction *tran, Error **errp)
 150{
 151    BlockJob *job = c->opaque;
 152    BdrvStateChildJobContext *s;
 153    GSList *l;
 154
 155    for (l = job->nodes; l; l = l->next) {
 156        BdrvChild *sibling = l->data;
 157        if (!bdrv_child_change_aio_context(sibling, ctx, visited,
 158                                           tran, errp)) {
 159            return false;
 160        }
 161    }
 162
 163    s = g_new(BdrvStateChildJobContext, 1);
 164    *s = (BdrvStateChildJobContext) {
 165        .new_ctx = ctx,
 166        .job = job,
 167    };
 168
 169    tran_add(tran, &change_child_job_context, s);
 170    return true;
 171}
 172
 173static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
 174{
 175    BlockJob *job = c->opaque;
 176    IO_CODE();
 177    JOB_LOCK_GUARD();
 178
 179    return job->job.aio_context;
 180}
 181
 182static const BdrvChildClass child_job = {
 183    .get_parent_desc    = child_job_get_parent_desc,
 184    .drained_begin      = child_job_drained_begin,
 185    .drained_poll       = child_job_drained_poll,
 186    .drained_end        = child_job_drained_end,
 187    .change_aio_ctx     = child_job_change_aio_ctx,
 188    .stay_at_node       = true,
 189    .get_parent_aio_context = child_job_get_parent_aio_context,
 190};
 191
 192void block_job_remove_all_bdrv(BlockJob *job)
 193{
 194    GLOBAL_STATE_CODE();
 195    /*
 196     * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(),
 197     * which will also traverse job->nodes, so consume the list one by
 198     * one to make sure that such a concurrent access does not attempt
 199     * to process an already freed BdrvChild.
 200     */
 201    bdrv_graph_wrlock_drained();
 202    while (job->nodes) {
 203        GSList *l = job->nodes;
 204        BdrvChild *c = l->data;
 205
 206        job->nodes = l->next;
 207
 208        bdrv_op_unblock_all(c->bs, job->blocker);
 209        bdrv_root_unref_child(c);
 210
 211        g_slist_free_1(l);
 212    }
 213    bdrv_graph_wrunlock();
 214}
 215
 216bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs)
 217{
 218    GSList *el;
 219    GLOBAL_STATE_CODE();
 220
 221    for (el = job->nodes; el; el = el->next) {
 222        BdrvChild *c = el->data;
 223        if (c->bs == bs) {
 224            return true;
 225        }
 226    }
 227
 228    return false;
 229}
 230
 231int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
 232                       uint64_t perm, uint64_t shared_perm, Error **errp)
 233{
 234    BdrvChild *c;
 235    GLOBAL_STATE_CODE();
 236
 237    bdrv_ref(bs);
 238
 239    c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job,
 240                               errp);
 241    if (c == NULL) {
 242        return -EPERM;
 243    }
 244
 245    job->nodes = g_slist_prepend(job->nodes, c);
 246    bdrv_op_block_all(bs, job->blocker);
 247
 248    return 0;
 249}
 250
 251/* Called with job_mutex lock held. */
 252static void block_job_on_idle_locked(Notifier *n, void *opaque)
 253{
 254    aio_wait_kick();
 255}
 256
 257bool block_job_is_internal(BlockJob *job)
 258{
 259    return (job->job.id == NULL);
 260}
 261
 262const BlockJobDriver *block_job_driver(BlockJob *job)
 263{
 264    return container_of(job->job.driver, BlockJobDriver, job_driver);
 265}
 266
 267/* Assumes the job_mutex is held */
 268static bool job_timer_pending(Job *job)
 269{
 270    return timer_pending(&job->sleep_timer);
 271}
 272
 273bool block_job_set_speed_locked(BlockJob *job, int64_t speed, Error **errp)
 274{
 275    const BlockJobDriver *drv = block_job_driver(job);
 276    int64_t old_speed = job->speed;
 277
 278    GLOBAL_STATE_CODE();
 279
 280    if (job_apply_verb_locked(&job->job, JOB_VERB_SET_SPEED, errp) < 0) {
 281        return false;
 282    }
 283    if (speed < 0) {
 284        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "speed",
 285                   "a non-negative value");
 286        return false;
 287    }
 288
 289    ratelimit_set_speed(&job->limit, speed, BLOCK_JOB_SLICE_TIME);
 290
 291    job->speed = speed;
 292
 293    if (drv->set_speed) {
 294        job_unlock();
 295        drv->set_speed(job, speed);
 296        job_lock();
 297    }
 298
 299    if (speed && speed <= old_speed) {
 300        return true;
 301    }
 302
 303    /* kick only if a timer is pending */
 304    job_enter_cond_locked(&job->job, job_timer_pending);
 305
 306    return true;
 307}
 308
 309static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 310{
 311    JOB_LOCK_GUARD();
 312    return block_job_set_speed_locked(job, speed, errp);
 313}
 314
 315void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts,
 316                             Error **errp)
 317{
 318    const BlockJobDriver *drv = block_job_driver(job);
 319
 320    GLOBAL_STATE_CODE();
 321
 322    if (job_apply_verb_locked(&job->job, JOB_VERB_CHANGE, errp)) {
 323        return;
 324    }
 325
 326    if (drv->change) {
 327        job_unlock();
 328        drv->change(job, opts, errp);
 329        job_lock();
 330    } else {
 331        error_setg(errp, "Job type does not support change");
 332    }
 333}
 334
 335void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n)
 336{
 337    IO_CODE();
 338    ratelimit_calculate_delay(&job->limit, n);
 339}
 340
 341void block_job_ratelimit_sleep(BlockJob *job)
 342{
 343    uint64_t delay_ns;
 344
 345    /*
 346     * Sleep at least once. If the job is reentered early, keep waiting until
 347     * we've waited for the full time that is necessary to keep the job at the
 348     * right speed.
 349     *
 350     * Make sure to recalculate the delay after each (possibly interrupted)
 351     * sleep because the speed can change while the job has yielded.
 352     */
 353    do {
 354        delay_ns = ratelimit_calculate_delay(&job->limit, 0);
 355        job_sleep_ns(&job->job, delay_ns);
 356    } while (delay_ns && !job_is_cancelled(&job->job));
 357}
 358
 359BlockJobInfo *block_job_query_locked(BlockJob *job, Error **errp)
 360{
 361    BlockJobInfo *info;
 362    uint64_t progress_current, progress_total;
 363    const BlockJobDriver *drv = block_job_driver(job);
 364
 365    GLOBAL_STATE_CODE();
 366
 367    if (block_job_is_internal(job)) {
 368        error_setg(errp, "Cannot query QEMU internal jobs");
 369        return NULL;
 370    }
 371
 372    progress_get_snapshot(&job->job.progress, &progress_current,
 373                          &progress_total);
 374
 375    info = g_new0(BlockJobInfo, 1);
 376    info->type      = job_type(&job->job);
 377    info->device    = g_strdup(job->job.id);
 378    info->busy      = job->job.busy;
 379    info->paused    = job->job.pause_count > 0;
 380    info->offset    = progress_current;
 381    info->len       = progress_total;
 382    info->speed     = job->speed;
 383    info->io_status = job->iostatus;
 384    info->ready     = job_is_ready_locked(&job->job),
 385    info->status    = job->job.status;
 386    info->auto_finalize = job->job.auto_finalize;
 387    info->auto_dismiss  = job->job.auto_dismiss;
 388    if (job->job.ret) {
 389        info->error = job->job.err ?
 390                        g_strdup(error_get_pretty(job->job.err)) :
 391                        g_strdup(strerror(-job->job.ret));
 392    }
 393    if (drv->query) {
 394        job_unlock();
 395        drv->query(job, info);
 396        job_lock();
 397    }
 398    return info;
 399}
 400
 401/* Called with job lock held */
 402static void block_job_iostatus_set_err_locked(BlockJob *job, int error)
 403{
 404    if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
 405        job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
 406                                          BLOCK_DEVICE_IO_STATUS_FAILED;
 407    }
 408}
 409
 410/* Called with job_mutex lock held. */
 411static void block_job_event_cancelled_locked(Notifier *n, void *opaque)
 412{
 413    BlockJob *job = opaque;
 414    uint64_t progress_current, progress_total;
 415
 416    if (block_job_is_internal(job)) {
 417        return;
 418    }
 419
 420    progress_get_snapshot(&job->job.progress, &progress_current,
 421                          &progress_total);
 422
 423    qapi_event_send_block_job_cancelled(job_type(&job->job),
 424                                        job->job.id,
 425                                        progress_total,
 426                                        progress_current,
 427                                        job->speed);
 428}
 429
 430/* Called with job_mutex lock held. */
 431static void block_job_event_completed_locked(Notifier *n, void *opaque)
 432{
 433    BlockJob *job = opaque;
 434    const char *msg = NULL;
 435    uint64_t progress_current, progress_total;
 436
 437    if (block_job_is_internal(job)) {
 438        return;
 439    }
 440
 441    if (job->job.ret < 0) {
 442        msg = error_get_pretty(job->job.err);
 443    }
 444
 445    progress_get_snapshot(&job->job.progress, &progress_current,
 446                          &progress_total);
 447
 448    qapi_event_send_block_job_completed(job_type(&job->job),
 449                                        job->job.id,
 450                                        progress_total,
 451                                        progress_current,
 452                                        job->speed,
 453                                        msg);
 454}
 455
 456/* Called with job_mutex lock held. */
 457static void block_job_event_pending_locked(Notifier *n, void *opaque)
 458{
 459    BlockJob *job = opaque;
 460
 461    if (block_job_is_internal(job)) {
 462        return;
 463    }
 464
 465    qapi_event_send_block_job_pending(job_type(&job->job),
 466                                      job->job.id);
 467}
 468
 469/* Called with job_mutex lock held. */
 470static void block_job_event_ready_locked(Notifier *n, void *opaque)
 471{
 472    BlockJob *job = opaque;
 473    uint64_t progress_current, progress_total;
 474
 475    if (block_job_is_internal(job)) {
 476        return;
 477    }
 478
 479    progress_get_snapshot(&job->job.progress, &progress_current,
 480                          &progress_total);
 481
 482    qapi_event_send_block_job_ready(job_type(&job->job),
 483                                    job->job.id,
 484                                    progress_total,
 485                                    progress_current,
 486                                    job->speed);
 487}
 488
 489
 490void *block_job_create(const char *job_id, const BlockJobDriver *driver,
 491                       JobTxn *txn, BlockDriverState *bs, uint64_t perm,
 492                       uint64_t shared_perm, int64_t speed, int flags,
 493                       BlockCompletionFunc *cb, void *opaque, Error **errp)
 494{
 495    BlockJob *job;
 496    int ret;
 497    GLOBAL_STATE_CODE();
 498
 499    bdrv_graph_wrlock_drained();
 500
 501    if (job_id == NULL && !(flags & JOB_INTERNAL)) {
 502        job_id = bdrv_get_device_name(bs);
 503    }
 504
 505    job = job_create(job_id, &driver->job_driver, txn, bdrv_get_aio_context(bs),
 506                     flags, cb, opaque, errp);
 507    if (job == NULL) {
 508        bdrv_graph_wrunlock();
 509        return NULL;
 510    }
 511
 512    assert(is_block_job(&job->job));
 513    assert(job->job.driver->free == &block_job_free);
 514    assert(job->job.driver->user_resume == &block_job_user_resume);
 515
 516    ratelimit_init(&job->limit);
 517
 518    job->finalize_cancelled_notifier.notify = block_job_event_cancelled_locked;
 519    job->finalize_completed_notifier.notify = block_job_event_completed_locked;
 520    job->pending_notifier.notify = block_job_event_pending_locked;
 521    job->ready_notifier.notify = block_job_event_ready_locked;
 522    job->idle_notifier.notify = block_job_on_idle_locked;
 523
 524    WITH_JOB_LOCK_GUARD() {
 525        notifier_list_add(&job->job.on_finalize_cancelled,
 526                          &job->finalize_cancelled_notifier);
 527        notifier_list_add(&job->job.on_finalize_completed,
 528                          &job->finalize_completed_notifier);
 529        notifier_list_add(&job->job.on_pending, &job->pending_notifier);
 530        notifier_list_add(&job->job.on_ready, &job->ready_notifier);
 531        notifier_list_add(&job->job.on_idle, &job->idle_notifier);
 532    }
 533
 534    error_setg(&job->blocker, "block device is in use by block job: %s",
 535               job_type_str(&job->job));
 536
 537    ret = block_job_add_bdrv(job, "main node", bs, perm, shared_perm, errp);
 538    if (ret < 0) {
 539        goto fail;
 540    }
 541
 542    if (!block_job_set_speed(job, speed, errp)) {
 543        goto fail;
 544    }
 545
 546    bdrv_graph_wrunlock();
 547    return job;
 548
 549fail:
 550    bdrv_graph_wrunlock();
 551    job_early_fail(&job->job);
 552    return NULL;
 553}
 554
 555void block_job_iostatus_reset_locked(BlockJob *job)
 556{
 557    GLOBAL_STATE_CODE();
 558    if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
 559        return;
 560    }
 561    assert(job->job.user_paused && job->job.pause_count > 0);
 562    job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
 563}
 564
 565static void block_job_iostatus_reset(BlockJob *job)
 566{
 567    JOB_LOCK_GUARD();
 568    block_job_iostatus_reset_locked(job);
 569}
 570
 571void block_job_user_resume(Job *job)
 572{
 573    BlockJob *bjob = container_of(job, BlockJob, job);
 574    GLOBAL_STATE_CODE();
 575    block_job_iostatus_reset(bjob);
 576}
 577
 578BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
 579                                        int is_read, int error)
 580{
 581    BlockErrorAction action;
 582    IO_CODE();
 583
 584    switch (on_err) {
 585    case BLOCKDEV_ON_ERROR_ENOSPC:
 586    case BLOCKDEV_ON_ERROR_AUTO:
 587        action = (error == ENOSPC) ?
 588                 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
 589        break;
 590    case BLOCKDEV_ON_ERROR_STOP:
 591        action = BLOCK_ERROR_ACTION_STOP;
 592        break;
 593    case BLOCKDEV_ON_ERROR_REPORT:
 594        action = BLOCK_ERROR_ACTION_REPORT;
 595        break;
 596    case BLOCKDEV_ON_ERROR_IGNORE:
 597        action = BLOCK_ERROR_ACTION_IGNORE;
 598        break;
 599    default:
 600        abort();
 601    }
 602    if (!block_job_is_internal(job)) {
 603        qapi_event_send_block_job_error(job->job.id,
 604                                        is_read ? IO_OPERATION_TYPE_READ :
 605                                        IO_OPERATION_TYPE_WRITE,
 606                                        action);
 607    }
 608    if (action == BLOCK_ERROR_ACTION_STOP) {
 609        WITH_JOB_LOCK_GUARD() {
 610            if (!job->job.user_paused) {
 611                job_pause_locked(&job->job);
 612                /*
 613                 * make the pause user visible, which will be
 614                 * resumed from QMP.
 615                 */
 616                job->job.user_paused = true;
 617            }
 618            block_job_iostatus_set_err_locked(job, error);
 619        }
 620    }
 621    return action;
 622}
 623
 624AioContext *block_job_get_aio_context(BlockJob *job)
 625{
 626    GLOBAL_STATE_CODE();
 627    return job->job.aio_context;
 628}
 629