qemu/include/block/aio.h
<<
>>
Prefs
   1/*
   2 * QEMU aio implementation
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#ifndef QEMU_AIO_H
  15#define QEMU_AIO_H
  16
  17#include "qemu-common.h"
  18#include "qemu/queue.h"
  19#include "qemu/event_notifier.h"
  20#include "qemu/thread.h"
  21#include "qemu/rfifolock.h"
  22#include "qemu/timer.h"
  23
  24typedef struct BlockAIOCB BlockAIOCB;
  25typedef void BlockCompletionFunc(void *opaque, int ret);
  26
  27typedef struct AIOCBInfo {
  28    void (*cancel_async)(BlockAIOCB *acb);
  29    AioContext *(*get_aio_context)(BlockAIOCB *acb);
  30    size_t aiocb_size;
  31} AIOCBInfo;
  32
  33struct BlockAIOCB {
  34    const AIOCBInfo *aiocb_info;
  35    BlockDriverState *bs;
  36    BlockCompletionFunc *cb;
  37    void *opaque;
  38    int refcnt;
  39};
  40
  41void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
  42                   BlockCompletionFunc *cb, void *opaque);
  43void qemu_aio_unref(void *p);
  44void qemu_aio_ref(void *p);
  45
  46typedef struct AioHandler AioHandler;
  47typedef void QEMUBHFunc(void *opaque);
  48typedef void IOHandler(void *opaque);
  49
  50struct ThreadPool;
  51struct LinuxAioState;
  52
  53struct AioContext {
  54    GSource source;
  55
  56    /* Protects all fields from multi-threaded access */
  57    RFifoLock lock;
  58
  59    /* The list of registered AIO handlers */
  60    QLIST_HEAD(, AioHandler) aio_handlers;
  61
  62    /* This is a simple lock used to protect the aio_handlers list.
  63     * Specifically, it's used to ensure that no callbacks are removed while
  64     * we're walking and dispatching callbacks.
  65     */
  66    int walking_handlers;
  67
  68    /* Used to avoid unnecessary event_notifier_set calls in aio_notify;
  69     * accessed with atomic primitives.  If this field is 0, everything
  70     * (file descriptors, bottom halves, timers) will be re-evaluated
  71     * before the next blocking poll(), thus the event_notifier_set call
  72     * can be skipped.  If it is non-zero, you may need to wake up a
  73     * concurrent aio_poll or the glib main event loop, making
  74     * event_notifier_set necessary.
  75     *
  76     * Bit 0 is reserved for GSource usage of the AioContext, and is 1
  77     * between a call to aio_ctx_prepare and the next call to aio_ctx_check.
  78     * Bits 1-31 simply count the number of active calls to aio_poll
  79     * that are in the prepare or poll phase.
  80     *
  81     * The GSource and aio_poll must use a different mechanism because
  82     * there is no certainty that a call to GSource's prepare callback
  83     * (via g_main_context_prepare) is indeed followed by check and
  84     * dispatch.  It's not clear whether this would be a bug, but let's
  85     * play safe and allow it---it will just cause extra calls to
  86     * event_notifier_set until the next call to dispatch.
  87     *
  88     * Instead, the aio_poll calls include both the prepare and the
  89     * dispatch phase, hence a simple counter is enough for them.
  90     */
  91    uint32_t notify_me;
  92
  93    /* lock to protect between bh's adders and deleter */
  94    QemuMutex bh_lock;
  95
  96    /* Anchor of the list of Bottom Halves belonging to the context */
  97    struct QEMUBH *first_bh;
  98
  99    /* A simple lock used to protect the first_bh list, and ensure that
 100     * no callbacks are removed while we're walking and dispatching callbacks.
 101     */
 102    int walking_bh;
 103
 104    /* Used by aio_notify.
 105     *
 106     * "notified" is used to avoid expensive event_notifier_test_and_clear
 107     * calls.  When it is clear, the EventNotifier is clear, or one thread
 108     * is going to clear "notified" before processing more events.  False
 109     * positives are possible, i.e. "notified" could be set even though the
 110     * EventNotifier is clear.
 111     *
 112     * Note that event_notifier_set *cannot* be optimized the same way.  For
 113     * more information on the problem that would result, see "#ifdef BUG2"
 114     * in the docs/aio_notify_accept.promela formal model.
 115     */
 116    bool notified;
 117    EventNotifier notifier;
 118
 119    /* Scheduling this BH forces the event loop it iterate */
 120    QEMUBH *notify_dummy_bh;
 121
 122    /* Thread pool for performing work and receiving completion callbacks */
 123    struct ThreadPool *thread_pool;
 124
 125#ifdef CONFIG_LINUX_AIO
 126    /* State for native Linux AIO.  Uses aio_context_acquire/release for
 127     * locking.
 128     */
 129    struct LinuxAioState *linux_aio;
 130#endif
 131
 132    /* TimerLists for calling timers - one per clock type */
 133    QEMUTimerListGroup tlg;
 134
 135    int external_disable_cnt;
 136
 137    /* epoll(7) state used when built with CONFIG_EPOLL */
 138    int epollfd;
 139    bool epoll_enabled;
 140    bool epoll_available;
 141};
 142
 143/**
 144 * aio_context_new: Allocate a new AioContext.
 145 *
 146 * AioContext provide a mini event-loop that can be waited on synchronously.
 147 * They also provide bottom halves, a service to execute a piece of code
 148 * as soon as possible.
 149 */
 150AioContext *aio_context_new(Error **errp);
 151
 152/**
 153 * aio_context_ref:
 154 * @ctx: The AioContext to operate on.
 155 *
 156 * Add a reference to an AioContext.
 157 */
 158void aio_context_ref(AioContext *ctx);
 159
 160/**
 161 * aio_context_unref:
 162 * @ctx: The AioContext to operate on.
 163 *
 164 * Drop a reference to an AioContext.
 165 */
 166void aio_context_unref(AioContext *ctx);
 167
 168/* Take ownership of the AioContext.  If the AioContext will be shared between
 169 * threads, and a thread does not want to be interrupted, it will have to
 170 * take ownership around calls to aio_poll().  Otherwise, aio_poll()
 171 * automatically takes care of calling aio_context_acquire and
 172 * aio_context_release.
 173 *
 174 * Access to timers and BHs from a thread that has not acquired AioContext
 175 * is possible.  Access to callbacks for now must be done while the AioContext
 176 * is owned by the thread (FIXME).
 177 */
 178void aio_context_acquire(AioContext *ctx);
 179
 180/* Relinquish ownership of the AioContext. */
 181void aio_context_release(AioContext *ctx);
 182
 183/**
 184 * aio_bh_new: Allocate a new bottom half structure.
 185 *
 186 * Bottom halves are lightweight callbacks whose invocation is guaranteed
 187 * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
 188 * is opaque and must be allocated prior to its use.
 189 */
 190QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
 191
 192/**
 193 * aio_notify: Force processing of pending events.
 194 *
 195 * Similar to signaling a condition variable, aio_notify forces
 196 * aio_wait to exit, so that the next call will re-examine pending events.
 197 * The caller of aio_notify will usually call aio_wait again very soon,
 198 * or go through another iteration of the GLib main loop.  Hence, aio_notify
 199 * also has the side effect of recalculating the sets of file descriptors
 200 * that the main loop waits for.
 201 *
 202 * Calling aio_notify is rarely necessary, because for example scheduling
 203 * a bottom half calls it already.
 204 */
 205void aio_notify(AioContext *ctx);
 206
 207/**
 208 * aio_notify_accept: Acknowledge receiving an aio_notify.
 209 *
 210 * aio_notify() uses an EventNotifier in order to wake up a sleeping
 211 * aio_poll() or g_main_context_iteration().  Calls to aio_notify() are
 212 * usually rare, but the AioContext has to clear the EventNotifier on
 213 * every aio_poll() or g_main_context_iteration() in order to avoid
 214 * busy waiting.  This event_notifier_test_and_clear() cannot be done
 215 * using the usual aio_context_set_event_notifier(), because it must
 216 * be done before processing all events (file descriptors, bottom halves,
 217 * timers).
 218 *
 219 * aio_notify_accept() is an optimized event_notifier_test_and_clear()
 220 * that is specific to an AioContext's notifier; it is used internally
 221 * to clear the EventNotifier only if aio_notify() had been called.
 222 */
 223void aio_notify_accept(AioContext *ctx);
 224
 225/**
 226 * aio_bh_call: Executes callback function of the specified BH.
 227 */
 228void aio_bh_call(QEMUBH *bh);
 229
 230/**
 231 * aio_bh_poll: Poll bottom halves for an AioContext.
 232 *
 233 * These are internal functions used by the QEMU main loop.
 234 * And notice that multiple occurrences of aio_bh_poll cannot
 235 * be called concurrently
 236 */
 237int aio_bh_poll(AioContext *ctx);
 238
 239/**
 240 * qemu_bh_schedule: Schedule a bottom half.
 241 *
 242 * Scheduling a bottom half interrupts the main loop and causes the
 243 * execution of the callback that was passed to qemu_bh_new.
 244 *
 245 * Bottom halves that are scheduled from a bottom half handler are instantly
 246 * invoked.  This can create an infinite loop if a bottom half handler
 247 * schedules itself.
 248 *
 249 * @bh: The bottom half to be scheduled.
 250 */
 251void qemu_bh_schedule(QEMUBH *bh);
 252
 253/**
 254 * qemu_bh_cancel: Cancel execution of a bottom half.
 255 *
 256 * Canceling execution of a bottom half undoes the effect of calls to
 257 * qemu_bh_schedule without freeing its resources yet.  While cancellation
 258 * itself is also wait-free and thread-safe, it can of course race with the
 259 * loop that executes bottom halves unless you are holding the iothread
 260 * mutex.  This makes it mostly useless if you are not holding the mutex.
 261 *
 262 * @bh: The bottom half to be canceled.
 263 */
 264void qemu_bh_cancel(QEMUBH *bh);
 265
 266/**
 267 *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
 268 *
 269 * Deleting a bottom half frees the memory that was allocated for it by
 270 * qemu_bh_new.  It also implies canceling the bottom half if it was
 271 * scheduled.
 272 * This func is async. The bottom half will do the delete action at the finial
 273 * end.
 274 *
 275 * @bh: The bottom half to be deleted.
 276 */
 277void qemu_bh_delete(QEMUBH *bh);
 278
 279/* Return whether there are any pending callbacks from the GSource
 280 * attached to the AioContext, before g_poll is invoked.
 281 *
 282 * This is used internally in the implementation of the GSource.
 283 */
 284bool aio_prepare(AioContext *ctx);
 285
 286/* Return whether there are any pending callbacks from the GSource
 287 * attached to the AioContext, after g_poll is invoked.
 288 *
 289 * This is used internally in the implementation of the GSource.
 290 */
 291bool aio_pending(AioContext *ctx);
 292
 293/* Dispatch any pending callbacks from the GSource attached to the AioContext.
 294 *
 295 * This is used internally in the implementation of the GSource.
 296 */
 297bool aio_dispatch(AioContext *ctx);
 298
 299/* Progress in completing AIO work to occur.  This can issue new pending
 300 * aio as a result of executing I/O completion or bh callbacks.
 301 *
 302 * Return whether any progress was made by executing AIO or bottom half
 303 * handlers.  If @blocking == true, this should always be true except
 304 * if someone called aio_notify.
 305 *
 306 * If there are no pending bottom halves, but there are pending AIO
 307 * operations, it may not be possible to make any progress without
 308 * blocking.  If @blocking is true, this function will wait until one
 309 * or more AIO events have completed, to ensure something has moved
 310 * before returning.
 311 */
 312bool aio_poll(AioContext *ctx, bool blocking);
 313
 314/* Register a file descriptor and associated callbacks.  Behaves very similarly
 315 * to qemu_set_fd_handler.  Unlike qemu_set_fd_handler, these callbacks will
 316 * be invoked when using aio_poll().
 317 *
 318 * Code that invokes AIO completion functions should rely on this function
 319 * instead of qemu_set_fd_handler[2].
 320 */
 321void aio_set_fd_handler(AioContext *ctx,
 322                        int fd,
 323                        bool is_external,
 324                        IOHandler *io_read,
 325                        IOHandler *io_write,
 326                        void *opaque);
 327
 328/* Register an event notifier and associated callbacks.  Behaves very similarly
 329 * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
 330 * will be invoked when using aio_poll().
 331 *
 332 * Code that invokes AIO completion functions should rely on this function
 333 * instead of event_notifier_set_handler.
 334 */
 335void aio_set_event_notifier(AioContext *ctx,
 336                            EventNotifier *notifier,
 337                            bool is_external,
 338                            EventNotifierHandler *io_read);
 339
 340/* Return a GSource that lets the main loop poll the file descriptors attached
 341 * to this AioContext.
 342 */
 343GSource *aio_get_g_source(AioContext *ctx);
 344
 345/* Return the ThreadPool bound to this AioContext */
 346struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
 347
 348/* Return the LinuxAioState bound to this AioContext */
 349struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
 350
 351/**
 352 * aio_timer_new:
 353 * @ctx: the aio context
 354 * @type: the clock type
 355 * @scale: the scale
 356 * @cb: the callback to call on timer expiry
 357 * @opaque: the opaque pointer to pass to the callback
 358 *
 359 * Allocate a new timer attached to the context @ctx.
 360 * The function is responsible for memory allocation.
 361 *
 362 * The preferred interface is aio_timer_init. Use that
 363 * unless you really need dynamic memory allocation.
 364 *
 365 * Returns: a pointer to the new timer
 366 */
 367static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type,
 368                                       int scale,
 369                                       QEMUTimerCB *cb, void *opaque)
 370{
 371    return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque);
 372}
 373
 374/**
 375 * aio_timer_init:
 376 * @ctx: the aio context
 377 * @ts: the timer
 378 * @type: the clock type
 379 * @scale: the scale
 380 * @cb: the callback to call on timer expiry
 381 * @opaque: the opaque pointer to pass to the callback
 382 *
 383 * Initialise a new timer attached to the context @ctx.
 384 * The caller is responsible for memory allocation.
 385 */
 386static inline void aio_timer_init(AioContext *ctx,
 387                                  QEMUTimer *ts, QEMUClockType type,
 388                                  int scale,
 389                                  QEMUTimerCB *cb, void *opaque)
 390{
 391    timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque);
 392}
 393
 394/**
 395 * aio_compute_timeout:
 396 * @ctx: the aio context
 397 *
 398 * Compute the timeout that a blocking aio_poll should use.
 399 */
 400int64_t aio_compute_timeout(AioContext *ctx);
 401
 402/**
 403 * aio_disable_external:
 404 * @ctx: the aio context
 405 *
 406 * Disable the further processing of external clients.
 407 */
 408static inline void aio_disable_external(AioContext *ctx)
 409{
 410    atomic_inc(&ctx->external_disable_cnt);
 411}
 412
 413/**
 414 * aio_enable_external:
 415 * @ctx: the aio context
 416 *
 417 * Enable the processing of external clients.
 418 */
 419static inline void aio_enable_external(AioContext *ctx)
 420{
 421    assert(ctx->external_disable_cnt > 0);
 422    atomic_dec(&ctx->external_disable_cnt);
 423}
 424
 425/**
 426 * aio_external_disabled:
 427 * @ctx: the aio context
 428 *
 429 * Return true if the external clients are disabled.
 430 */
 431static inline bool aio_external_disabled(AioContext *ctx)
 432{
 433    return atomic_read(&ctx->external_disable_cnt);
 434}
 435
 436/**
 437 * aio_node_check:
 438 * @ctx: the aio context
 439 * @is_external: Whether or not the checked node is an external event source.
 440 *
 441 * Check if the node's is_external flag is okay to be polled by the ctx at this
 442 * moment. True means green light.
 443 */
 444static inline bool aio_node_check(AioContext *ctx, bool is_external)
 445{
 446    return !is_external || !atomic_read(&ctx->external_disable_cnt);
 447}
 448
 449/**
 450 * aio_context_setup:
 451 * @ctx: the aio context
 452 *
 453 * Initialize the aio context.
 454 */
 455void aio_context_setup(AioContext *ctx);
 456
 457#endif
 458