LXR qemu/util/aio-posix.c

   1/*
   2 * QEMU aio implementation
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 * Contributions after 2012-01-13 are licensed under the terms of the
  13 * GNU GPL, version 2 or (at your option) any later version.
  14 */
  15
  16#include "qemu/osdep.h"
  17#include "qemu-common.h"
  18#include "block/block.h"
  19#include "qemu/rcu_queue.h"
  20#include "qemu/sockets.h"
  21#include "qemu/cutils.h"
  22#include "trace.h"
  23#ifdef CONFIG_EPOLL_CREATE1
  24#include <sys/epoll.h>
  25#endif
  26
  27struct AioHandler
  28{
  29    GPollFD pfd;
  30    IOHandler *io_read;
  31    IOHandler *io_write;
  32    AioPollFn *io_poll;
  33    IOHandler *io_poll_begin;
  34    IOHandler *io_poll_end;
  35    int deleted;
  36    void *opaque;
  37    bool is_external;
  38    QLIST_ENTRY(AioHandler) node;
  39};
  40
  41#ifdef CONFIG_EPOLL_CREATE1
  42
  43/* The fd number threshold to switch to epoll */
  44#define EPOLL_ENABLE_THRESHOLD 64
  45
  46static void aio_epoll_disable(AioContext *ctx)
  47{
  48    ctx->epoll_enabled = false;
  49    if (!ctx->epoll_available) {
  50        return;
  51    }
  52    ctx->epoll_available = false;
  53    close(ctx->epollfd);
  54}
  55
  56static inline int epoll_events_from_pfd(int pfd_events)
  57{
  58    return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
  59           (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
  60           (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
  61           (pfd_events & G_IO_ERR ? EPOLLERR : 0);
  62}
  63
  64static bool aio_epoll_try_enable(AioContext *ctx)
  65{
  66    AioHandler *node;
  67    struct epoll_event event;
  68
  69    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  70        int r;
  71        if (node->deleted || !node->pfd.events) {
  72            continue;
  73        }
  74        event.events = epoll_events_from_pfd(node->pfd.events);
  75        event.data.ptr = node;
  76        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
  77        if (r) {
  78            return false;
  79        }
  80    }
  81    ctx->epoll_enabled = true;
  82    return true;
  83}
  84
  85static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
  86{
  87    struct epoll_event event;
  88    int r;
  89    int ctl;
  90
  91    if (!ctx->epoll_enabled) {
  92        return;
  93    }
  94    if (!node->pfd.events) {
  95        ctl = EPOLL_CTL_DEL;
  96    } else {
  97        event.data.ptr = node;
  98        event.events = epoll_events_from_pfd(node->pfd.events);
  99        ctl = is_new ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
 100    }
 101
 102    r = epoll_ctl(ctx->epollfd, ctl, node->pfd.fd, &event);
 103    if (r) {
 104        aio_epoll_disable(ctx);
 105    }
 106}
 107
 108static int aio_epoll(AioContext *ctx, GPollFD *pfds,
 109                     unsigned npfd, int64_t timeout)
 110{
 111    AioHandler *node;
 112    int i, ret = 0;
 113    struct epoll_event events[128];
 114
 115    assert(npfd == 1);
 116    assert(pfds[0].fd == ctx->epollfd);
 117    if (timeout > 0) {
 118        ret = qemu_poll_ns(pfds, npfd, timeout);
 119    }
 120    if (timeout <= 0 || ret > 0) {
 121        ret = epoll_wait(ctx->epollfd, events,
 122                         ARRAY_SIZE(events),
 123                         timeout);
 124        if (ret <= 0) {
 125            goto out;
 126        }
 127        for (i = 0; i < ret; i++) {
 128            int ev = events[i].events;
 129            node = events[i].data.ptr;
 130            node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) |
 131                (ev & EPOLLOUT ? G_IO_OUT : 0) |
 132                (ev & EPOLLHUP ? G_IO_HUP : 0) |
 133                (ev & EPOLLERR ? G_IO_ERR : 0);
 134        }
 135    }
 136out:
 137    return ret;
 138}
 139
 140static bool aio_epoll_enabled(AioContext *ctx)
 141{
 142    /* Fall back to ppoll when external clients are disabled. */
 143    return !aio_external_disabled(ctx) && ctx->epoll_enabled;
 144}
 145
 146static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
 147                                 unsigned npfd, int64_t timeout)
 148{
 149    if (!ctx->epoll_available) {
 150        return false;
 151    }
 152    if (aio_epoll_enabled(ctx)) {
 153        return true;
 154    }
 155    if (npfd >= EPOLL_ENABLE_THRESHOLD) {
 156        if (aio_epoll_try_enable(ctx)) {
 157            return true;
 158        } else {
 159            aio_epoll_disable(ctx);
 160        }
 161    }
 162    return false;
 163}
 164
 165#else
 166
 167static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
 168{
 169}
 170
 171static int aio_epoll(AioContext *ctx, GPollFD *pfds,
 172                     unsigned npfd, int64_t timeout)
 173{
 174    assert(false);
 175}
 176
 177static bool aio_epoll_enabled(AioContext *ctx)
 178{
 179    return false;
 180}
 181
 182static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
 183                          unsigned npfd, int64_t timeout)
 184{
 185    return false;
 186}
 187
 188#endif
 189
 190static AioHandler *find_aio_handler(AioContext *ctx, int fd)
 191{
 192    AioHandler *node;
 193
 194    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
 195        if (node->pfd.fd == fd)
 196            if (!node->deleted)
 197                return node;
 198    }
 199
 200    return NULL;
 201}
 202
 203static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
 204{
 205    /* If the GSource is in the process of being destroyed then
 206     * g_source_remove_poll() causes an assertion failure.  Skip
 207     * removal in that case, because glib cleans up its state during
 208     * destruction anyway.
 209     */
 210    if (!g_source_is_destroyed(&ctx->source)) {
 211        g_source_remove_poll(&ctx->source, &node->pfd);
 212    }
 213
 214    /* If a read is in progress, just mark the node as deleted */
 215    if (qemu_lockcnt_count(&ctx->list_lock)) {
 216        node->deleted = 1;
 217        node->pfd.revents = 0;
 218        return false;
 219    }
 220    /* Otherwise, delete it for real.  We can't just mark it as
 221     * deleted because deleted nodes are only cleaned up while
 222     * no one is walking the handlers list.
 223     */
 224    QLIST_REMOVE(node, node);
 225    return true;
 226}
 227
 228void aio_set_fd_handler(AioContext *ctx,
 229                        int fd,
 230                        bool is_external,
 231                        IOHandler *io_read,
 232                        IOHandler *io_write,
 233                        AioPollFn *io_poll,
 234                        void *opaque)
 235{
 236    AioHandler *node;
 237    AioHandler *new_node = NULL;
 238    bool is_new = false;
 239    bool deleted = false;
 240    int poll_disable_change;
 241
 242    qemu_lockcnt_lock(&ctx->list_lock);
 243
 244    node = find_aio_handler(ctx, fd);
 245
 246    /* Are we deleting the fd handler? */
 247    if (!io_read && !io_write && !io_poll) {
 248        if (node == NULL) {
 249            qemu_lockcnt_unlock(&ctx->list_lock);
 250            return;
 251        }
 252        /* Clean events in order to unregister fd from the ctx epoll. */
 253        node->pfd.events = 0;
 254
 255        poll_disable_change = -!node->io_poll;
 256    } else {
 257        poll_disable_change = !io_poll - (node && !node->io_poll);
 258        if (node == NULL) {
 259            is_new = true;
 260        }
 261        /* Alloc and insert if it's not already there */
 262        new_node = g_new0(AioHandler, 1);
 263
 264        /* Update handler with latest information */
 265        new_node->io_read = io_read;
 266        new_node->io_write = io_write;
 267        new_node->io_poll = io_poll;
 268        new_node->opaque = opaque;
 269        new_node->is_external = is_external;
 270
 271        if (is_new) {
 272            new_node->pfd.fd = fd;
 273        } else {
 274            new_node->pfd = node->pfd;
 275        }
 276        g_source_add_poll(&ctx->source, &new_node->pfd);
 277
 278        new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
 279        new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
 280
 281        QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
 282    }
 283    if (node) {
 284        deleted = aio_remove_fd_handler(ctx, node);
 285    }
 286
 287    /* No need to order poll_disable_cnt writes against other updates;
 288     * the counter is only used to avoid wasting time and latency on
 289     * iterated polling when the system call will be ultimately necessary.
 290     * Changing handlers is a rare event, and a little wasted polling until
 291     * the aio_notify below is not an issue.
 292     */
 293    atomic_set(&ctx->poll_disable_cnt,
 294               atomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
 295
 296    if (new_node) {
 297        aio_epoll_update(ctx, new_node, is_new);
 298    } else if (node) {
 299        /* Unregister deleted fd_handler */
 300        aio_epoll_update(ctx, node, false);
 301    }
 302    qemu_lockcnt_unlock(&ctx->list_lock);
 303    aio_notify(ctx);
 304
 305    if (deleted) {
 306        g_free(node);
 307    }
 308}
 309
 310void aio_set_fd_poll(AioContext *ctx, int fd,
 311                     IOHandler *io_poll_begin,
 312                     IOHandler *io_poll_end)
 313{
 314    AioHandler *node = find_aio_handler(ctx, fd);
 315
 316    if (!node) {
 317        return;
 318    }
 319
 320    node->io_poll_begin = io_poll_begin;
 321    node->io_poll_end = io_poll_end;
 322}
 323
 324void aio_set_event_notifier(AioContext *ctx,
 325                            EventNotifier *notifier,
 326                            bool is_external,
 327                            EventNotifierHandler *io_read,
 328                            AioPollFn *io_poll)
 329{
 330    aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
 331                       (IOHandler *)io_read, NULL, io_poll, notifier);
 332}
 333
 334void aio_set_event_notifier_poll(AioContext *ctx,
 335                                 EventNotifier *notifier,
 336                                 EventNotifierHandler *io_poll_begin,
 337                                 EventNotifierHandler *io_poll_end)
 338{
 339    aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
 340                    (IOHandler *)io_poll_begin,
 341                    (IOHandler *)io_poll_end);
 342}
 343
 344static void poll_set_started(AioContext *ctx, bool started)
 345{
 346    AioHandler *node;
 347
 348    if (started == ctx->poll_started) {
 349        return;
 350    }
 351
 352    ctx->poll_started = started;
 353
 354    qemu_lockcnt_inc(&ctx->list_lock);
 355    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
 356        IOHandler *fn;
 357
 358        if (node->deleted) {
 359            continue;
 360        }
 361
 362        if (started) {
 363            fn = node->io_poll_begin;
 364        } else {
 365            fn = node->io_poll_end;
 366        }
 367
 368        if (fn) {
 369            fn(node->opaque);
 370        }
 371    }
 372    qemu_lockcnt_dec(&ctx->list_lock);
 373}
 374
 375
 376bool aio_prepare(AioContext *ctx)
 377{
 378    /* Poll mode cannot be used with glib's event loop, disable it. */
 379    poll_set_started(ctx, false);
 380
 381    return false;
 382}
 383
 384bool aio_pending(AioContext *ctx)
 385{
 386    AioHandler *node;
 387    bool result = false;
 388
 389    /*
 390     * We have to walk very carefully in case aio_set_fd_handler is
 391     * called while we're walking.
 392     */
 393    qemu_lockcnt_inc(&ctx->list_lock);
 394
 395    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
 396        int revents;
 397
 398        revents = node->pfd.revents & node->pfd.events;
 399        if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
 400            aio_node_check(ctx, node->is_external)) {
 401            result = true;
 402            break;
 403        }
 404        if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
 405            aio_node_check(ctx, node->is_external)) {
 406            result = true;
 407            break;
 408        }
 409    }
 410    qemu_lockcnt_dec(&ctx->list_lock);
 411
 412    return result;
 413}
 414
 415static bool aio_dispatch_handlers(AioContext *ctx)
 416{
 417    AioHandler *node, *tmp;
 418    bool progress = false;
 419
 420    QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
 421        int revents;
 422
 423        revents = node->pfd.revents & node->pfd.events;
 424        node->pfd.revents = 0;
 425
 426        if (!node->deleted &&
 427            (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
 428            aio_node_check(ctx, node->is_external) &&
 429            node->io_read) {
 430            node->io_read(node->opaque);
 431
 432            /* aio_notify() does not count as progress */
 433            if (node->opaque != &ctx->notifier) {
 434                progress = true;
 435            }
 436        }
 437        if (!node->deleted &&
 438            (revents & (G_IO_OUT | G_IO_ERR)) &&
 439            aio_node_check(ctx, node->is_external) &&
 440            node->io_write) {
 441            node->io_write(node->opaque);
 442            progress = true;
 443        }
 444
 445        if (node->deleted) {
 446            if (qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
 447                QLIST_REMOVE(node, node);
 448                g_free(node);
 449                qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
 450            }
 451        }
 452    }
 453
 454    return progress;
 455}
 456
 457void aio_dispatch(AioContext *ctx)
 458{
 459    qemu_lockcnt_inc(&ctx->list_lock);
 460    aio_bh_poll(ctx);
 461    aio_dispatch_handlers(ctx);
 462    qemu_lockcnt_dec(&ctx->list_lock);
 463
 464    timerlistgroup_run_timers(&ctx->tlg);
 465}
 466
 467/* These thread-local variables are used only in a small part of aio_poll
 468 * around the call to the poll() system call.  In particular they are not
 469 * used while aio_poll is performing callbacks, which makes it much easier
 470 * to think about reentrancy!
 471 *
 472 * Stack-allocated arrays would be perfect but they have size limitations;
 473 * heap allocation is expensive enough that we want to reuse arrays across
 474 * calls to aio_poll().  And because poll() has to be called without holding
 475 * any lock, the arrays cannot be stored in AioContext.  Thread-local data
 476 * has none of the disadvantages of these three options.
 477 */
 478static __thread GPollFD *pollfds;
 479static __thread AioHandler **nodes;
 480static __thread unsigned npfd, nalloc;
 481static __thread Notifier pollfds_cleanup_notifier;
 482
 483static void pollfds_cleanup(Notifier *n, void *unused)
 484{
 485    g_assert(npfd == 0);
 486    g_free(pollfds);
 487    g_free(nodes);
 488    nalloc = 0;
 489}
 490
 491static void add_pollfd(AioHandler *node)
 492{
 493    if (npfd == nalloc) {
 494        if (nalloc == 0) {
 495            pollfds_cleanup_notifier.notify = pollfds_cleanup;
 496            qemu_thread_atexit_add(&pollfds_cleanup_notifier);
 497            nalloc = 8;
 498        } else {
 499            g_assert(nalloc <= INT_MAX);
 500            nalloc *= 2;
 501        }
 502        pollfds = g_renew(GPollFD, pollfds, nalloc);
 503        nodes = g_renew(AioHandler *, nodes, nalloc);
 504    }
 505    nodes[npfd] = node;
 506    pollfds[npfd] = (GPollFD) {
 507        .fd = node->pfd.fd,
 508        .events = node->pfd.events,
 509    };
 510    npfd++;
 511}
 512
 513static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout)
 514{
 515    bool progress = false;
 516    AioHandler *node;
 517
 518    QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
 519        if (!node->deleted && node->io_poll &&
 520            aio_node_check(ctx, node->is_external) &&
 521            node->io_poll(node->opaque)) {
 522            *timeout = 0;
 523            if (node->opaque != &ctx->notifier) {
 524                progress = true;
 525            }
 526        }
 527
 528        /* Caller handles freeing deleted nodes.  Don't do it here. */
 529    }
 530
 531    return progress;
 532}
 533
 534/* run_poll_handlers:
 535 * @ctx: the AioContext
 536 * @max_ns: maximum time to poll for, in nanoseconds
 537 *
 538 * Polls for a given time.
 539 *
 540 * Note that ctx->notify_me must be non-zero so this function can detect
 541 * aio_notify().
 542 *
 543 * Note that the caller must have incremented ctx->list_lock.
 544 *
 545 * Returns: true if progress was made, false otherwise
 546 */
 547static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
 548{
 549    bool progress;
 550    int64_t start_time, elapsed_time;
 551
 552    assert(ctx->notify_me);
 553    assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
 554
 555    trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
 556
 557    start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 558    do {
 559        progress = run_poll_handlers_once(ctx, timeout);
 560        elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
 561    } while (!progress && elapsed_time < max_ns
 562             && !atomic_read(&ctx->poll_disable_cnt));
 563
 564    /* If time has passed with no successful polling, adjust *timeout to
 565     * keep the same ending time.
 566     */
 567    if (*timeout != -1) {
 568        *timeout -= MIN(*timeout, elapsed_time);
 569    }
 570
 571    trace_run_poll_handlers_end(ctx, progress, *timeout);
 572    return progress;
 573}
 574
 575/* try_poll_mode:
 576 * @ctx: the AioContext
 577 * @timeout: timeout for blocking wait, computed by the caller and updated if
 578 *    polling succeeds.
 579 *
 580 * ctx->notify_me must be non-zero so this function can detect aio_notify().
 581 *
 582 * Note that the caller must have incremented ctx->list_lock.
 583 *
 584 * Returns: true if progress was made, false otherwise
 585 */
 586static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
 587{
 588    /* See qemu_soonest_timeout() uint64_t hack */
 589    int64_t max_ns = MIN((uint64_t)*timeout, (uint64_t)ctx->poll_ns);
 590
 591    if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) {
 592        poll_set_started(ctx, true);
 593
 594        if (run_poll_handlers(ctx, max_ns, timeout)) {
 595            return true;
 596        }
 597    }
 598
 599    poll_set_started(ctx, false);
 600
 601    /* Even if we don't run busy polling, try polling once in case it can make
 602     * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2).
 603     */
 604    return run_poll_handlers_once(ctx, timeout);
 605}
 606
 607bool aio_poll(AioContext *ctx, bool blocking)
 608{
 609    AioHandler *node;
 610    int i;
 611    int ret = 0;
 612    bool progress;
 613    int64_t timeout;
 614    int64_t start = 0;
 615
 616    assert(in_aio_context_home_thread(ctx));
 617
 618    /* aio_notify can avoid the expensive event_notifier_set if
 619     * everything (file descriptors, bottom halves, timers) will
 620     * be re-evaluated before the next blocking poll().  This is
 621     * already true when aio_poll is called with blocking == false;
 622     * if blocking == true, it is only true after poll() returns,
 623     * so disable the optimization now.
 624     */
 625    if (blocking) {
 626        atomic_add(&ctx->notify_me, 2);
 627    }
 628
 629    qemu_lockcnt_inc(&ctx->list_lock);
 630
 631    if (ctx->poll_max_ns) {
 632        start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 633    }
 634
 635    timeout = blocking ? aio_compute_timeout(ctx) : 0;
 636    progress = try_poll_mode(ctx, &timeout);
 637    assert(!(timeout && progress));
 638
 639    /* If polling is allowed, non-blocking aio_poll does not need the
 640     * system call---a single round of run_poll_handlers_once suffices.
 641     */
 642    if (timeout || atomic_read(&ctx->poll_disable_cnt)) {
 643        assert(npfd == 0);
 644
 645        /* fill pollfds */
 646
 647        if (!aio_epoll_enabled(ctx)) {
 648            QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
 649                if (!node->deleted && node->pfd.events
 650                    && aio_node_check(ctx, node->is_external)) {
 651                    add_pollfd(node);
 652                }
 653            }
 654        }
 655
 656        /* wait until next event */
 657        if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
 658            AioHandler epoll_handler;
 659
 660            epoll_handler.pfd.fd = ctx->epollfd;
 661            epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
 662            npfd = 0;
 663            add_pollfd(&epoll_handler);
 664            ret = aio_epoll(ctx, pollfds, npfd, timeout);
 665        } else  {
 666            ret = qemu_poll_ns(pollfds, npfd, timeout);
 667        }
 668    }
 669
 670    if (blocking) {
 671        atomic_sub(&ctx->notify_me, 2);
 672        aio_notify_accept(ctx);
 673    }
 674
 675    /* Adjust polling time */
 676    if (ctx->poll_max_ns) {
 677        int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
 678
 679        if (block_ns <= ctx->poll_ns) {
 680            /* This is the sweet spot, no adjustment needed */
 681        } else if (block_ns > ctx->poll_max_ns) {
 682            /* We'd have to poll for too long, poll less */
 683            int64_t old = ctx->poll_ns;
 684
 685            if (ctx->poll_shrink) {
 686                ctx->poll_ns /= ctx->poll_shrink;
 687            } else {
 688                ctx->poll_ns = 0;
 689            }
 690
 691            trace_poll_shrink(ctx, old, ctx->poll_ns);
 692        } else if (ctx->poll_ns < ctx->poll_max_ns &&
 693                   block_ns < ctx->poll_max_ns) {
 694            /* There is room to grow, poll longer */
 695            int64_t old = ctx->poll_ns;
 696            int64_t grow = ctx->poll_grow;
 697
 698            if (grow == 0) {
 699                grow = 2;
 700            }
 701
 702            if (ctx->poll_ns) {
 703                ctx->poll_ns *= grow;
 704            } else {
 705                ctx->poll_ns = 4000; /* start polling at 4 microseconds */
 706            }
 707
 708            if (ctx->poll_ns > ctx->poll_max_ns) {
 709                ctx->poll_ns = ctx->poll_max_ns;
 710            }
 711
 712            trace_poll_grow(ctx, old, ctx->poll_ns);
 713        }
 714    }
 715
 716    /* if we have any readable fds, dispatch event */
 717    if (ret > 0) {
 718        for (i = 0; i < npfd; i++) {
 719            nodes[i]->pfd.revents = pollfds[i].revents;
 720        }
 721    }
 722
 723    npfd = 0;
 724
 725    progress |= aio_bh_poll(ctx);
 726
 727    if (ret > 0) {
 728        progress |= aio_dispatch_handlers(ctx);
 729    }
 730
 731    qemu_lockcnt_dec(&ctx->list_lock);
 732
 733    progress |= timerlistgroup_run_timers(&ctx->tlg);
 734
 735    return progress;
 736}
 737
 738void aio_context_setup(AioContext *ctx)
 739{
 740#ifdef CONFIG_EPOLL_CREATE1
 741    assert(!ctx->epollfd);
 742    ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
 743    if (ctx->epollfd == -1) {
 744        fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
 745        ctx->epoll_available = false;
 746    } else {
 747        ctx->epoll_available = true;
 748    }
 749#endif
 750}
 751
 752void aio_context_destroy(AioContext *ctx)
 753{
 754#ifdef CONFIG_EPOLL_CREATE1
 755    aio_epoll_disable(ctx);
 756#endif
 757}
 758
 759void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
 760                                 int64_t grow, int64_t shrink, Error **errp)
 761{
 762    /* No thread synchronization here, it doesn't matter if an incorrect value
 763     * is used once.
 764     */
 765    ctx->poll_max_ns = max_ns;
 766    ctx->poll_ns = 0;
 767    ctx->poll_grow = grow;
 768    ctx->poll_shrink = shrink;
 769
 770    aio_notify(ctx);
 771}
 772