qemu/util/main-loop.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "qapi/error.h"
  27#include "qemu/cutils.h"
  28#include "qemu/timer.h"
  29#include "sysemu/cpu-timers.h"
  30#include "sysemu/replay.h"
  31#include "qemu/main-loop.h"
  32#include "block/aio.h"
  33#include "qemu/error-report.h"
  34#include "qemu/queue.h"
  35#include "qemu/compiler.h"
  36
  37#ifndef _WIN32
  38#include <sys/wait.h>
  39#endif
  40
  41#ifndef _WIN32
  42
  43/* If we have signalfd, we mask out the signals we want to handle and then
  44 * use signalfd to listen for them.  We rely on whatever the current signal
  45 * handler is to dispatch the signals when we receive them.
  46 */
  47/*
  48 * Disable CFI checks.
  49 * We are going to call a signal hander directly. Such handler may or may not
  50 * have been defined in our binary, so there's no guarantee that the pointer
  51 * used to set the handler is a cfi-valid pointer. Since the handlers are
  52 * stored in kernel memory, changing the handler to an attacker-defined
  53 * function requires being able to call a sigaction() syscall,
  54 * which is not as easy as overwriting a pointer in memory.
  55 */
  56QEMU_DISABLE_CFI
  57static void sigfd_handler(void *opaque)
  58{
  59    int fd = (intptr_t)opaque;
  60    struct qemu_signalfd_siginfo info;
  61    struct sigaction action;
  62    ssize_t len;
  63
  64    while (1) {
  65        do {
  66            len = read(fd, &info, sizeof(info));
  67        } while (len == -1 && errno == EINTR);
  68
  69        if (len == -1 && errno == EAGAIN) {
  70            break;
  71        }
  72
  73        if (len != sizeof(info)) {
  74            error_report("read from sigfd returned %zd: %s", len,
  75                         g_strerror(errno));
  76            return;
  77        }
  78
  79        sigaction(info.ssi_signo, NULL, &action);
  80        if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
  81            sigaction_invoke(&action, &info);
  82        } else if (action.sa_handler) {
  83            action.sa_handler(info.ssi_signo);
  84        }
  85    }
  86}
  87
  88static int qemu_signal_init(Error **errp)
  89{
  90    int sigfd;
  91    sigset_t set;
  92
  93    /*
  94     * SIG_IPI must be blocked in the main thread and must not be caught
  95     * by sigwait() in the signal thread. Otherwise, the cpu thread will
  96     * not catch it reliably.
  97     */
  98    sigemptyset(&set);
  99    sigaddset(&set, SIG_IPI);
 100    sigaddset(&set, SIGIO);
 101    sigaddset(&set, SIGALRM);
 102    sigaddset(&set, SIGBUS);
 103    /* SIGINT cannot be handled via signalfd, so that ^C can be used
 104     * to interrupt QEMU when it is being run under gdb.  SIGHUP and
 105     * SIGTERM are also handled asynchronously, even though it is not
 106     * strictly necessary, because they use the same handler as SIGINT.
 107     */
 108    pthread_sigmask(SIG_BLOCK, &set, NULL);
 109
 110    sigdelset(&set, SIG_IPI);
 111    sigfd = qemu_signalfd(&set);
 112    if (sigfd == -1) {
 113        error_setg_errno(errp, errno, "failed to create signalfd");
 114        return -errno;
 115    }
 116
 117    fcntl_setfl(sigfd, O_NONBLOCK);
 118
 119    qemu_set_fd_handler(sigfd, sigfd_handler, NULL, (void *)(intptr_t)sigfd);
 120
 121    return 0;
 122}
 123
 124#else /* _WIN32 */
 125
 126static int qemu_signal_init(Error **errp)
 127{
 128    return 0;
 129}
 130#endif
 131
 132static AioContext *qemu_aio_context;
 133static QEMUBH *qemu_notify_bh;
 134
 135static void notify_event_cb(void *opaque)
 136{
 137    /* No need to do anything; this bottom half is only used to
 138     * kick the kernel out of ppoll/poll/WaitForMultipleObjects.
 139     */
 140}
 141
 142AioContext *qemu_get_aio_context(void)
 143{
 144    return qemu_aio_context;
 145}
 146
 147void qemu_notify_event(void)
 148{
 149    if (!qemu_aio_context) {
 150        return;
 151    }
 152    qemu_bh_schedule(qemu_notify_bh);
 153}
 154
 155static GArray *gpollfds;
 156
 157int qemu_init_main_loop(Error **errp)
 158{
 159    int ret;
 160    GSource *src;
 161
 162    init_clocks(qemu_timer_notify_cb);
 163
 164    ret = qemu_signal_init(errp);
 165    if (ret) {
 166        return ret;
 167    }
 168
 169    qemu_aio_context = aio_context_new(errp);
 170    if (!qemu_aio_context) {
 171        return -EMFILE;
 172    }
 173    qemu_set_current_aio_context(qemu_aio_context);
 174    qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL);
 175    gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
 176    src = aio_get_g_source(qemu_aio_context);
 177    g_source_set_name(src, "aio-context");
 178    g_source_attach(src, NULL);
 179    g_source_unref(src);
 180    src = iohandler_get_g_source();
 181    g_source_set_name(src, "io-handler");
 182    g_source_attach(src, NULL);
 183    g_source_unref(src);
 184    return 0;
 185}
 186
 187static int max_priority;
 188
 189#ifndef _WIN32
 190static int glib_pollfds_idx;
 191static int glib_n_poll_fds;
 192
 193void qemu_fd_register(int fd)
 194{
 195}
 196
 197static void glib_pollfds_fill(int64_t *cur_timeout)
 198{
 199    GMainContext *context = g_main_context_default();
 200    int timeout = 0;
 201    int64_t timeout_ns;
 202    int n;
 203
 204    g_main_context_prepare(context, &max_priority);
 205
 206    glib_pollfds_idx = gpollfds->len;
 207    n = glib_n_poll_fds;
 208    do {
 209        GPollFD *pfds;
 210        glib_n_poll_fds = n;
 211        g_array_set_size(gpollfds, glib_pollfds_idx + glib_n_poll_fds);
 212        pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
 213        n = g_main_context_query(context, max_priority, &timeout, pfds,
 214                                 glib_n_poll_fds);
 215    } while (n != glib_n_poll_fds);
 216
 217    if (timeout < 0) {
 218        timeout_ns = -1;
 219    } else {
 220        timeout_ns = (int64_t)timeout * (int64_t)SCALE_MS;
 221    }
 222
 223    *cur_timeout = qemu_soonest_timeout(timeout_ns, *cur_timeout);
 224}
 225
 226static void glib_pollfds_poll(void)
 227{
 228    GMainContext *context = g_main_context_default();
 229    GPollFD *pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
 230
 231    if (g_main_context_check(context, max_priority, pfds, glib_n_poll_fds)) {
 232        g_main_context_dispatch(context);
 233    }
 234}
 235
 236#define MAX_MAIN_LOOP_SPIN (1000)
 237
 238static int os_host_main_loop_wait(int64_t timeout)
 239{
 240    GMainContext *context = g_main_context_default();
 241    int ret;
 242
 243    g_main_context_acquire(context);
 244
 245    glib_pollfds_fill(&timeout);
 246
 247    qemu_mutex_unlock_iothread();
 248    replay_mutex_unlock();
 249
 250    ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
 251
 252    replay_mutex_lock();
 253    qemu_mutex_lock_iothread();
 254
 255    glib_pollfds_poll();
 256
 257    g_main_context_release(context);
 258
 259    return ret;
 260}
 261#else
 262/***********************************************************/
 263/* Polling handling */
 264
 265typedef struct PollingEntry {
 266    PollingFunc *func;
 267    void *opaque;
 268    struct PollingEntry *next;
 269} PollingEntry;
 270
 271static PollingEntry *first_polling_entry;
 272
 273int qemu_add_polling_cb(PollingFunc *func, void *opaque)
 274{
 275    PollingEntry **ppe, *pe;
 276    pe = g_new0(PollingEntry, 1);
 277    pe->func = func;
 278    pe->opaque = opaque;
 279    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
 280    *ppe = pe;
 281    return 0;
 282}
 283
 284void qemu_del_polling_cb(PollingFunc *func, void *opaque)
 285{
 286    PollingEntry **ppe, *pe;
 287    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
 288        pe = *ppe;
 289        if (pe->func == func && pe->opaque == opaque) {
 290            *ppe = pe->next;
 291            g_free(pe);
 292            break;
 293        }
 294    }
 295}
 296
 297/***********************************************************/
 298/* Wait objects support */
 299typedef struct WaitObjects {
 300    int num;
 301    int revents[MAXIMUM_WAIT_OBJECTS + 1];
 302    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
 303    WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
 304    void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
 305} WaitObjects;
 306
 307static WaitObjects wait_objects = {0};
 308
 309int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
 310{
 311    WaitObjects *w = &wait_objects;
 312    if (w->num >= MAXIMUM_WAIT_OBJECTS) {
 313        return -1;
 314    }
 315    w->events[w->num] = handle;
 316    w->func[w->num] = func;
 317    w->opaque[w->num] = opaque;
 318    w->revents[w->num] = 0;
 319    w->num++;
 320    return 0;
 321}
 322
 323void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
 324{
 325    int i, found;
 326    WaitObjects *w = &wait_objects;
 327
 328    found = 0;
 329    for (i = 0; i < w->num; i++) {
 330        if (w->events[i] == handle) {
 331            found = 1;
 332        }
 333        if (found) {
 334            w->events[i] = w->events[i + 1];
 335            w->func[i] = w->func[i + 1];
 336            w->opaque[i] = w->opaque[i + 1];
 337            w->revents[i] = w->revents[i + 1];
 338        }
 339    }
 340    if (found) {
 341        w->num--;
 342    }
 343}
 344
 345void qemu_fd_register(int fd)
 346{
 347    WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier),
 348                   FD_READ | FD_ACCEPT | FD_CLOSE |
 349                   FD_CONNECT | FD_WRITE | FD_OOB);
 350}
 351
 352static int pollfds_fill(GArray *pollfds, fd_set *rfds, fd_set *wfds,
 353                        fd_set *xfds)
 354{
 355    int nfds = -1;
 356    int i;
 357
 358    for (i = 0; i < pollfds->len; i++) {
 359        GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
 360        int fd = pfd->fd;
 361        int events = pfd->events;
 362        if (events & G_IO_IN) {
 363            FD_SET(fd, rfds);
 364            nfds = MAX(nfds, fd);
 365        }
 366        if (events & G_IO_OUT) {
 367            FD_SET(fd, wfds);
 368            nfds = MAX(nfds, fd);
 369        }
 370        if (events & G_IO_PRI) {
 371            FD_SET(fd, xfds);
 372            nfds = MAX(nfds, fd);
 373        }
 374    }
 375    return nfds;
 376}
 377
 378static void pollfds_poll(GArray *pollfds, int nfds, fd_set *rfds,
 379                         fd_set *wfds, fd_set *xfds)
 380{
 381    int i;
 382
 383    for (i = 0; i < pollfds->len; i++) {
 384        GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
 385        int fd = pfd->fd;
 386        int revents = 0;
 387
 388        if (FD_ISSET(fd, rfds)) {
 389            revents |= G_IO_IN;
 390        }
 391        if (FD_ISSET(fd, wfds)) {
 392            revents |= G_IO_OUT;
 393        }
 394        if (FD_ISSET(fd, xfds)) {
 395            revents |= G_IO_PRI;
 396        }
 397        pfd->revents = revents & pfd->events;
 398    }
 399}
 400
 401static int os_host_main_loop_wait(int64_t timeout)
 402{
 403    GMainContext *context = g_main_context_default();
 404    GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
 405    int select_ret = 0;
 406    int g_poll_ret, ret, i, n_poll_fds;
 407    PollingEntry *pe;
 408    WaitObjects *w = &wait_objects;
 409    gint poll_timeout;
 410    int64_t poll_timeout_ns;
 411    static struct timeval tv0;
 412    fd_set rfds, wfds, xfds;
 413    int nfds;
 414
 415    g_main_context_acquire(context);
 416
 417    /* XXX: need to suppress polling by better using win32 events */
 418    ret = 0;
 419    for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
 420        ret |= pe->func(pe->opaque);
 421    }
 422    if (ret != 0) {
 423        g_main_context_release(context);
 424        return ret;
 425    }
 426
 427    FD_ZERO(&rfds);
 428    FD_ZERO(&wfds);
 429    FD_ZERO(&xfds);
 430    nfds = pollfds_fill(gpollfds, &rfds, &wfds, &xfds);
 431    if (nfds >= 0) {
 432        select_ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0);
 433        if (select_ret != 0) {
 434            timeout = 0;
 435        }
 436        if (select_ret > 0) {
 437            pollfds_poll(gpollfds, nfds, &rfds, &wfds, &xfds);
 438        }
 439    }
 440
 441    g_main_context_prepare(context, &max_priority);
 442    n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
 443                                      poll_fds, ARRAY_SIZE(poll_fds));
 444    g_assert(n_poll_fds + w->num <= ARRAY_SIZE(poll_fds));
 445
 446    for (i = 0; i < w->num; i++) {
 447        poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];
 448        poll_fds[n_poll_fds + i].events = G_IO_IN;
 449    }
 450
 451    if (poll_timeout < 0) {
 452        poll_timeout_ns = -1;
 453    } else {
 454        poll_timeout_ns = (int64_t)poll_timeout * (int64_t)SCALE_MS;
 455    }
 456
 457    poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);
 458
 459    qemu_mutex_unlock_iothread();
 460
 461    replay_mutex_unlock();
 462
 463    g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);
 464
 465    replay_mutex_lock();
 466
 467    qemu_mutex_lock_iothread();
 468    if (g_poll_ret > 0) {
 469        for (i = 0; i < w->num; i++) {
 470            w->revents[i] = poll_fds[n_poll_fds + i].revents;
 471        }
 472        for (i = 0; i < w->num; i++) {
 473            if (w->revents[i] && w->func[i]) {
 474                w->func[i](w->opaque[i]);
 475            }
 476        }
 477    }
 478
 479    if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) {
 480        g_main_context_dispatch(context);
 481    }
 482
 483    g_main_context_release(context);
 484
 485    return select_ret || g_poll_ret;
 486}
 487#endif
 488
 489static NotifierList main_loop_poll_notifiers =
 490    NOTIFIER_LIST_INITIALIZER(main_loop_poll_notifiers);
 491
 492void main_loop_poll_add_notifier(Notifier *notify)
 493{
 494    notifier_list_add(&main_loop_poll_notifiers, notify);
 495}
 496
 497void main_loop_poll_remove_notifier(Notifier *notify)
 498{
 499    notifier_remove(notify);
 500}
 501
 502void main_loop_wait(int nonblocking)
 503{
 504    MainLoopPoll mlpoll = {
 505        .state = MAIN_LOOP_POLL_FILL,
 506        .timeout = UINT32_MAX,
 507        .pollfds = gpollfds,
 508    };
 509    int ret;
 510    int64_t timeout_ns;
 511
 512    if (nonblocking) {
 513        mlpoll.timeout = 0;
 514    }
 515
 516    /* poll any events */
 517    g_array_set_size(gpollfds, 0); /* reset for new iteration */
 518    /* XXX: separate device handlers from system ones */
 519    notifier_list_notify(&main_loop_poll_notifiers, &mlpoll);
 520
 521    if (mlpoll.timeout == UINT32_MAX) {
 522        timeout_ns = -1;
 523    } else {
 524        timeout_ns = (uint64_t)mlpoll.timeout * (int64_t)(SCALE_MS);
 525    }
 526
 527    timeout_ns = qemu_soonest_timeout(timeout_ns,
 528                                      timerlistgroup_deadline_ns(
 529                                          &main_loop_tlg));
 530
 531    ret = os_host_main_loop_wait(timeout_ns);
 532    mlpoll.state = ret < 0 ? MAIN_LOOP_POLL_ERR : MAIN_LOOP_POLL_OK;
 533    notifier_list_notify(&main_loop_poll_notifiers, &mlpoll);
 534
 535    if (icount_enabled()) {
 536        /*
 537         * CPU thread can infinitely wait for event after
 538         * missing the warp
 539         */
 540        icount_start_warp_timer();
 541    }
 542    qemu_clock_run_all_timers();
 543}
 544
 545/* Functions to operate on the main QEMU AioContext.  */
 546
 547QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
 548{
 549    return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
 550}
 551
 552/*
 553 * Functions to operate on the I/O handler AioContext.
 554 * This context runs on top of main loop. We can't reuse qemu_aio_context
 555 * because iohandlers mustn't be polled by aio_poll(qemu_aio_context).
 556 */
 557static AioContext *iohandler_ctx;
 558
 559static void iohandler_init(void)
 560{
 561    if (!iohandler_ctx) {
 562        iohandler_ctx = aio_context_new(&error_abort);
 563    }
 564}
 565
 566AioContext *iohandler_get_aio_context(void)
 567{
 568    iohandler_init();
 569    return iohandler_ctx;
 570}
 571
 572GSource *iohandler_get_g_source(void)
 573{
 574    iohandler_init();
 575    return aio_get_g_source(iohandler_ctx);
 576}
 577
 578void qemu_set_fd_handler(int fd,
 579                         IOHandler *fd_read,
 580                         IOHandler *fd_write,
 581                         void *opaque)
 582{
 583    iohandler_init();
 584    aio_set_fd_handler(iohandler_ctx, fd, false,
 585                       fd_read, fd_write, NULL, NULL, opaque);
 586}
 587
 588void event_notifier_set_handler(EventNotifier *e,
 589                                EventNotifierHandler *handler)
 590{
 591    iohandler_init();
 592    aio_set_event_notifier(iohandler_ctx, e, false,
 593                           handler, NULL, NULL);
 594}
 595