qemu/util/oslib-posix.c
<<
>>
Prefs
   1/*
   2 * os-posix-lib.c
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2010 Red Hat, Inc.
   6 *
   7 * QEMU library functions on POSIX which are shared between QEMU and
   8 * the QEMU tools.
   9 *
  10 * Permission is hereby granted, free of charge, to any person obtaining a copy
  11 * of this software and associated documentation files (the "Software"), to deal
  12 * in the Software without restriction, including without limitation the rights
  13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 * copies of the Software, and to permit persons to whom the Software is
  15 * furnished to do so, subject to the following conditions:
  16 *
  17 * The above copyright notice and this permission notice shall be included in
  18 * all copies or substantial portions of the Software.
  19 *
  20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26 * THE SOFTWARE.
  27 */
  28
  29#include "qemu/osdep.h"
  30#include <termios.h>
  31
  32#include <glib/gprintf.h>
  33
  34#include "sysemu/sysemu.h"
  35#include "trace.h"
  36#include "qapi/error.h"
  37#include "qemu/error-report.h"
  38#include "qemu/madvise.h"
  39#include "qemu/sockets.h"
  40#include "qemu/thread.h"
  41#include <libgen.h>
  42#include "qemu/cutils.h"
  43#include "qemu/compiler.h"
  44#include "qemu/units.h"
  45#include "qemu/thread-context.h"
  46
  47#ifdef CONFIG_LINUX
  48#include <sys/syscall.h>
  49#endif
  50
  51#ifdef __FreeBSD__
  52#include <sys/thr.h>
  53#include <sys/types.h>
  54#include <sys/user.h>
  55#include <libutil.h>
  56#endif
  57
  58#ifdef __NetBSD__
  59#include <lwp.h>
  60#endif
  61
  62#include "qemu/mmap-alloc.h"
  63
  64#ifdef CONFIG_DEBUG_STACK_USAGE
  65#include "qemu/error-report.h"
  66#endif
  67
  68#define MAX_MEM_PREALLOC_THREAD_COUNT 16
  69
  70struct MemsetThread;
  71
  72typedef struct MemsetContext {
  73    bool all_threads_created;
  74    bool any_thread_failed;
  75    struct MemsetThread *threads;
  76    int num_threads;
  77} MemsetContext;
  78
  79struct MemsetThread {
  80    char *addr;
  81    size_t numpages;
  82    size_t hpagesize;
  83    QemuThread pgthread;
  84    sigjmp_buf env;
  85    MemsetContext *context;
  86};
  87typedef struct MemsetThread MemsetThread;
  88
  89/* used by sigbus_handler() */
  90static MemsetContext *sigbus_memset_context;
  91struct sigaction sigbus_oldact;
  92static QemuMutex sigbus_mutex;
  93
  94static QemuMutex page_mutex;
  95static QemuCond page_cond;
  96
  97int qemu_get_thread_id(void)
  98{
  99#if defined(__linux__)
 100    return syscall(SYS_gettid);
 101#elif defined(__FreeBSD__)
 102    /* thread id is up to INT_MAX */
 103    long tid;
 104    thr_self(&tid);
 105    return (int)tid;
 106#elif defined(__NetBSD__)
 107    return _lwp_self();
 108#elif defined(__OpenBSD__)
 109    return getthrid();
 110#else
 111    return getpid();
 112#endif
 113}
 114
 115int qemu_daemon(int nochdir, int noclose)
 116{
 117    return daemon(nochdir, noclose);
 118}
 119
 120bool qemu_write_pidfile(const char *path, Error **errp)
 121{
 122    int fd;
 123    char pidstr[32];
 124
 125    while (1) {
 126        struct stat a, b;
 127        struct flock lock = {
 128            .l_type = F_WRLCK,
 129            .l_whence = SEEK_SET,
 130            .l_len = 0,
 131        };
 132
 133        fd = qemu_create(path, O_WRONLY, S_IRUSR | S_IWUSR, errp);
 134        if (fd == -1) {
 135            return false;
 136        }
 137
 138        if (fstat(fd, &b) < 0) {
 139            error_setg_errno(errp, errno, "Cannot stat file");
 140            goto fail_close;
 141        }
 142
 143        if (fcntl(fd, F_SETLK, &lock)) {
 144            error_setg_errno(errp, errno, "Cannot lock pid file");
 145            goto fail_close;
 146        }
 147
 148        /*
 149         * Now make sure the path we locked is the same one that now
 150         * exists on the filesystem.
 151         */
 152        if (stat(path, &a) < 0) {
 153            /*
 154             * PID file disappeared, someone else must be racing with
 155             * us, so try again.
 156             */
 157            close(fd);
 158            continue;
 159        }
 160
 161        if (a.st_ino == b.st_ino) {
 162            break;
 163        }
 164
 165        /*
 166         * PID file was recreated, someone else must be racing with
 167         * us, so try again.
 168         */
 169        close(fd);
 170    }
 171
 172    if (ftruncate(fd, 0) < 0) {
 173        error_setg_errno(errp, errno, "Failed to truncate pid file");
 174        goto fail_unlink;
 175    }
 176
 177    snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid());
 178    if (qemu_write_full(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
 179        error_setg(errp, "Failed to write pid file");
 180        goto fail_unlink;
 181    }
 182
 183    return true;
 184
 185fail_unlink:
 186    unlink(path);
 187fail_close:
 188    close(fd);
 189    return false;
 190}
 191
 192/* alloc shared memory pages */
 193void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
 194                          bool noreserve)
 195{
 196    const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) |
 197                                    (noreserve ? QEMU_MAP_NORESERVE : 0);
 198    size_t align = QEMU_VMALLOC_ALIGN;
 199    void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0);
 200
 201    if (ptr == MAP_FAILED) {
 202        return NULL;
 203    }
 204
 205    if (alignment) {
 206        *alignment = align;
 207    }
 208
 209    trace_qemu_anon_ram_alloc(size, ptr);
 210    return ptr;
 211}
 212
 213void qemu_anon_ram_free(void *ptr, size_t size)
 214{
 215    trace_qemu_anon_ram_free(ptr, size);
 216    qemu_ram_munmap(-1, ptr, size);
 217}
 218
 219void qemu_socket_set_block(int fd)
 220{
 221    g_unix_set_fd_nonblocking(fd, false, NULL);
 222}
 223
 224int qemu_socket_try_set_nonblock(int fd)
 225{
 226    return g_unix_set_fd_nonblocking(fd, true, NULL) ? 0 : -errno;
 227}
 228
 229void qemu_socket_set_nonblock(int fd)
 230{
 231    int f;
 232    f = qemu_socket_try_set_nonblock(fd);
 233    assert(f == 0);
 234}
 235
 236int socket_set_fast_reuse(int fd)
 237{
 238    int val = 1, ret;
 239
 240    ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
 241                     (const char *)&val, sizeof(val));
 242
 243    assert(ret == 0);
 244
 245    return ret;
 246}
 247
 248void qemu_set_cloexec(int fd)
 249{
 250    int f;
 251    f = fcntl(fd, F_GETFD);
 252    assert(f != -1);
 253    f = fcntl(fd, F_SETFD, f | FD_CLOEXEC);
 254    assert(f != -1);
 255}
 256
 257int qemu_socketpair(int domain, int type, int protocol, int sv[2])
 258{
 259    int ret;
 260
 261#ifdef SOCK_CLOEXEC
 262    ret = socketpair(domain, type | SOCK_CLOEXEC, protocol, sv);
 263    if (ret != -1 || errno != EINVAL) {
 264        return ret;
 265    }
 266#endif
 267    ret = socketpair(domain, type, protocol, sv);;
 268    if (ret == 0) {
 269        qemu_set_cloexec(sv[0]);
 270        qemu_set_cloexec(sv[1]);
 271    }
 272
 273    return ret;
 274}
 275
 276char *
 277qemu_get_local_state_dir(void)
 278{
 279    return get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR);
 280}
 281
 282void qemu_set_tty_echo(int fd, bool echo)
 283{
 284    struct termios tty;
 285
 286    tcgetattr(fd, &tty);
 287
 288    if (echo) {
 289        tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
 290    } else {
 291        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
 292    }
 293
 294    tcsetattr(fd, TCSANOW, &tty);
 295}
 296
 297#ifdef CONFIG_LINUX
 298static void sigbus_handler(int signal, siginfo_t *siginfo, void *ctx)
 299#else /* CONFIG_LINUX */
 300static void sigbus_handler(int signal)
 301#endif /* CONFIG_LINUX */
 302{
 303    int i;
 304
 305    if (sigbus_memset_context) {
 306        for (i = 0; i < sigbus_memset_context->num_threads; i++) {
 307            MemsetThread *thread = &sigbus_memset_context->threads[i];
 308
 309            if (qemu_thread_is_self(&thread->pgthread)) {
 310                siglongjmp(thread->env, 1);
 311            }
 312        }
 313    }
 314
 315#ifdef CONFIG_LINUX
 316    /*
 317     * We assume that the MCE SIGBUS handler could have been registered. We
 318     * should never receive BUS_MCEERR_AO on any of our threads, but only on
 319     * the main thread registered for PR_MCE_KILL_EARLY. Further, we should not
 320     * receive BUS_MCEERR_AR triggered by action of other threads on one of
 321     * our threads. So, no need to check for unrelated SIGBUS when seeing one
 322     * for our threads.
 323     *
 324     * We will forward to the MCE handler, which will either handle the SIGBUS
 325     * or reinstall the default SIGBUS handler and reraise the SIGBUS. The
 326     * default SIGBUS handler will crash the process, so we don't care.
 327     */
 328    if (sigbus_oldact.sa_flags & SA_SIGINFO) {
 329        sigbus_oldact.sa_sigaction(signal, siginfo, ctx);
 330        return;
 331    }
 332#endif /* CONFIG_LINUX */
 333    warn_report("qemu_prealloc_mem: unrelated SIGBUS detected and ignored");
 334}
 335
 336static void *do_touch_pages(void *arg)
 337{
 338    MemsetThread *memset_args = (MemsetThread *)arg;
 339    sigset_t set, oldset;
 340    int ret = 0;
 341
 342    /*
 343     * On Linux, the page faults from the loop below can cause mmap_sem
 344     * contention with allocation of the thread stacks.  Do not start
 345     * clearing until all threads have been created.
 346     */
 347    qemu_mutex_lock(&page_mutex);
 348    while (!memset_args->context->all_threads_created) {
 349        qemu_cond_wait(&page_cond, &page_mutex);
 350    }
 351    qemu_mutex_unlock(&page_mutex);
 352
 353    /* unblock SIGBUS */
 354    sigemptyset(&set);
 355    sigaddset(&set, SIGBUS);
 356    pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
 357
 358    if (sigsetjmp(memset_args->env, 1)) {
 359        ret = -EFAULT;
 360    } else {
 361        char *addr = memset_args->addr;
 362        size_t numpages = memset_args->numpages;
 363        size_t hpagesize = memset_args->hpagesize;
 364        size_t i;
 365        for (i = 0; i < numpages; i++) {
 366            /*
 367             * Read & write back the same value, so we don't
 368             * corrupt existing user/app data that might be
 369             * stored.
 370             *
 371             * 'volatile' to stop compiler optimizing this away
 372             * to a no-op
 373             */
 374            *(volatile char *)addr = *addr;
 375            addr += hpagesize;
 376        }
 377    }
 378    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
 379    return (void *)(uintptr_t)ret;
 380}
 381
 382static void *do_madv_populate_write_pages(void *arg)
 383{
 384    MemsetThread *memset_args = (MemsetThread *)arg;
 385    const size_t size = memset_args->numpages * memset_args->hpagesize;
 386    char * const addr = memset_args->addr;
 387    int ret = 0;
 388
 389    /* See do_touch_pages(). */
 390    qemu_mutex_lock(&page_mutex);
 391    while (!memset_args->context->all_threads_created) {
 392        qemu_cond_wait(&page_cond, &page_mutex);
 393    }
 394    qemu_mutex_unlock(&page_mutex);
 395
 396    if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) {
 397        ret = -errno;
 398    }
 399    return (void *)(uintptr_t)ret;
 400}
 401
 402static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
 403                                         int max_threads)
 404{
 405    long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
 406    int ret = 1;
 407
 408    if (host_procs > 0) {
 409        ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), max_threads);
 410    }
 411
 412    /* Especially with gigantic pages, don't create more threads than pages. */
 413    ret = MIN(ret, numpages);
 414    /* Don't start threads to prealloc comparatively little memory. */
 415    ret = MIN(ret, MAX(1, hpagesize * numpages / (64 * MiB)));
 416
 417    /* In case sysconf() fails, we fall back to single threaded */
 418    return ret;
 419}
 420
 421static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
 422                           int max_threads, ThreadContext *tc,
 423                           bool use_madv_populate_write)
 424{
 425    static gsize initialized = 0;
 426    MemsetContext context = {
 427        .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads),
 428    };
 429    size_t numpages_per_thread, leftover;
 430    void *(*touch_fn)(void *);
 431    int ret = 0, i = 0;
 432    char *addr = area;
 433
 434    if (g_once_init_enter(&initialized)) {
 435        qemu_mutex_init(&page_mutex);
 436        qemu_cond_init(&page_cond);
 437        g_once_init_leave(&initialized, 1);
 438    }
 439
 440    if (use_madv_populate_write) {
 441        /* Avoid creating a single thread for MADV_POPULATE_WRITE */
 442        if (context.num_threads == 1) {
 443            if (qemu_madvise(area, hpagesize * numpages,
 444                             QEMU_MADV_POPULATE_WRITE)) {
 445                return -errno;
 446            }
 447            return 0;
 448        }
 449        touch_fn = do_madv_populate_write_pages;
 450    } else {
 451        touch_fn = do_touch_pages;
 452    }
 453
 454    context.threads = g_new0(MemsetThread, context.num_threads);
 455    numpages_per_thread = numpages / context.num_threads;
 456    leftover = numpages % context.num_threads;
 457    for (i = 0; i < context.num_threads; i++) {
 458        context.threads[i].addr = addr;
 459        context.threads[i].numpages = numpages_per_thread + (i < leftover);
 460        context.threads[i].hpagesize = hpagesize;
 461        context.threads[i].context = &context;
 462        if (tc) {
 463            thread_context_create_thread(tc, &context.threads[i].pgthread,
 464                                         "touch_pages",
 465                                         touch_fn, &context.threads[i],
 466                                         QEMU_THREAD_JOINABLE);
 467        } else {
 468            qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
 469                               touch_fn, &context.threads[i],
 470                               QEMU_THREAD_JOINABLE);
 471        }
 472        addr += context.threads[i].numpages * hpagesize;
 473    }
 474
 475    if (!use_madv_populate_write) {
 476        sigbus_memset_context = &context;
 477    }
 478
 479    qemu_mutex_lock(&page_mutex);
 480    context.all_threads_created = true;
 481    qemu_cond_broadcast(&page_cond);
 482    qemu_mutex_unlock(&page_mutex);
 483
 484    for (i = 0; i < context.num_threads; i++) {
 485        int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread);
 486
 487        if (tmp) {
 488            ret = tmp;
 489        }
 490    }
 491
 492    if (!use_madv_populate_write) {
 493        sigbus_memset_context = NULL;
 494    }
 495    g_free(context.threads);
 496
 497    return ret;
 498}
 499
 500static bool madv_populate_write_possible(char *area, size_t pagesize)
 501{
 502    return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) ||
 503           errno != EINVAL;
 504}
 505
 506void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
 507                       ThreadContext *tc, Error **errp)
 508{
 509    static gsize initialized;
 510    int ret;
 511    size_t hpagesize = qemu_fd_getpagesize(fd);
 512    size_t numpages = DIV_ROUND_UP(sz, hpagesize);
 513    bool use_madv_populate_write;
 514    struct sigaction act;
 515
 516    /*
 517     * Sense on every invocation, as MADV_POPULATE_WRITE cannot be used for
 518     * some special mappings, such as mapping /dev/mem.
 519     */
 520    use_madv_populate_write = madv_populate_write_possible(area, hpagesize);
 521
 522    if (!use_madv_populate_write) {
 523        if (g_once_init_enter(&initialized)) {
 524            qemu_mutex_init(&sigbus_mutex);
 525            g_once_init_leave(&initialized, 1);
 526        }
 527
 528        qemu_mutex_lock(&sigbus_mutex);
 529        memset(&act, 0, sizeof(act));
 530#ifdef CONFIG_LINUX
 531        act.sa_sigaction = &sigbus_handler;
 532        act.sa_flags = SA_SIGINFO;
 533#else /* CONFIG_LINUX */
 534        act.sa_handler = &sigbus_handler;
 535        act.sa_flags = 0;
 536#endif /* CONFIG_LINUX */
 537
 538        ret = sigaction(SIGBUS, &act, &sigbus_oldact);
 539        if (ret) {
 540            qemu_mutex_unlock(&sigbus_mutex);
 541            error_setg_errno(errp, errno,
 542                "qemu_prealloc_mem: failed to install signal handler");
 543            return;
 544        }
 545    }
 546
 547    /* touch pages simultaneously */
 548    ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
 549                          use_madv_populate_write);
 550    if (ret) {
 551        error_setg_errno(errp, -ret,
 552                         "qemu_prealloc_mem: preallocating memory failed");
 553    }
 554
 555    if (!use_madv_populate_write) {
 556        ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
 557        if (ret) {
 558            /* Terminate QEMU since it can't recover from error */
 559            perror("qemu_prealloc_mem: failed to reinstall signal handler");
 560            exit(1);
 561        }
 562        qemu_mutex_unlock(&sigbus_mutex);
 563    }
 564}
 565
 566char *qemu_get_pid_name(pid_t pid)
 567{
 568    char *name = NULL;
 569
 570#if defined(__FreeBSD__)
 571    /* BSDs don't have /proc, but they provide a nice substitute */
 572    struct kinfo_proc *proc = kinfo_getproc(pid);
 573
 574    if (proc) {
 575        name = g_strdup(proc->ki_comm);
 576        free(proc);
 577    }
 578#else
 579    /* Assume a system with reasonable procfs */
 580    char *pid_path;
 581    size_t len;
 582
 583    pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
 584    g_file_get_contents(pid_path, &name, &len, NULL);
 585    g_free(pid_path);
 586#endif
 587
 588    return name;
 589}
 590
 591
 592pid_t qemu_fork(Error **errp)
 593{
 594    sigset_t oldmask, newmask;
 595    struct sigaction sig_action;
 596    int saved_errno;
 597    pid_t pid;
 598
 599    /*
 600     * Need to block signals now, so that child process can safely
 601     * kill off caller's signal handlers without a race.
 602     */
 603    sigfillset(&newmask);
 604    if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
 605        error_setg_errno(errp, errno,
 606                         "cannot block signals");
 607        return -1;
 608    }
 609
 610    pid = fork();
 611    saved_errno = errno;
 612
 613    if (pid < 0) {
 614        /* attempt to restore signal mask, but ignore failure, to
 615         * avoid obscuring the fork failure */
 616        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
 617        error_setg_errno(errp, saved_errno,
 618                         "cannot fork child process");
 619        errno = saved_errno;
 620        return -1;
 621    } else if (pid) {
 622        /* parent process */
 623
 624        /* Restore our original signal mask now that the child is
 625         * safely running. Only documented failures are EFAULT (not
 626         * possible, since we are using just-grabbed mask) or EINVAL
 627         * (not possible, since we are using correct arguments).  */
 628        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
 629    } else {
 630        /* child process */
 631        size_t i;
 632
 633        /* Clear out all signal handlers from parent so nothing
 634         * unexpected can happen in our child once we unblock
 635         * signals */
 636        sig_action.sa_handler = SIG_DFL;
 637        sig_action.sa_flags = 0;
 638        sigemptyset(&sig_action.sa_mask);
 639
 640        for (i = 1; i < NSIG; i++) {
 641            /* Only possible errors are EFAULT or EINVAL The former
 642             * won't happen, the latter we expect, so no need to check
 643             * return value */
 644            (void)sigaction(i, &sig_action, NULL);
 645        }
 646
 647        /* Unmask all signals in child, since we've no idea what the
 648         * caller's done with their signal mask and don't want to
 649         * propagate that to children */
 650        sigemptyset(&newmask);
 651        if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
 652            Error *local_err = NULL;
 653            error_setg_errno(&local_err, errno,
 654                             "cannot unblock signals");
 655            error_report_err(local_err);
 656            _exit(1);
 657        }
 658    }
 659    return pid;
 660}
 661
 662void *qemu_alloc_stack(size_t *sz)
 663{
 664    void *ptr, *guardpage;
 665    int flags;
 666#ifdef CONFIG_DEBUG_STACK_USAGE
 667    void *ptr2;
 668#endif
 669    size_t pagesz = qemu_real_host_page_size();
 670#ifdef _SC_THREAD_STACK_MIN
 671    /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
 672    long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
 673    *sz = MAX(MAX(min_stack_sz, 0), *sz);
 674#endif
 675    /* adjust stack size to a multiple of the page size */
 676    *sz = ROUND_UP(*sz, pagesz);
 677    /* allocate one extra page for the guard page */
 678    *sz += pagesz;
 679
 680    flags = MAP_PRIVATE | MAP_ANONYMOUS;
 681#if defined(MAP_STACK) && defined(__OpenBSD__)
 682    /* Only enable MAP_STACK on OpenBSD. Other OS's such as
 683     * Linux/FreeBSD/NetBSD have a flag with the same name
 684     * but have differing functionality. OpenBSD will SEGV
 685     * if it spots execution with a stack pointer pointing
 686     * at memory that was not allocated with MAP_STACK.
 687     */
 688    flags |= MAP_STACK;
 689#endif
 690
 691    ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, flags, -1, 0);
 692    if (ptr == MAP_FAILED) {
 693        perror("failed to allocate memory for stack");
 694        abort();
 695    }
 696
 697#if defined(HOST_IA64)
 698    /* separate register stack */
 699    guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
 700#elif defined(HOST_HPPA)
 701    /* stack grows up */
 702    guardpage = ptr + *sz - pagesz;
 703#else
 704    /* stack grows down */
 705    guardpage = ptr;
 706#endif
 707    if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
 708        perror("failed to set up stack guard page");
 709        abort();
 710    }
 711
 712#ifdef CONFIG_DEBUG_STACK_USAGE
 713    for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
 714        *(uint32_t *)ptr2 = 0xdeadbeaf;
 715    }
 716#endif
 717
 718    return ptr;
 719}
 720
 721#ifdef CONFIG_DEBUG_STACK_USAGE
 722static __thread unsigned int max_stack_usage;
 723#endif
 724
 725void qemu_free_stack(void *stack, size_t sz)
 726{
 727#ifdef CONFIG_DEBUG_STACK_USAGE
 728    unsigned int usage;
 729    void *ptr;
 730
 731    for (ptr = stack + qemu_real_host_page_size(); ptr < stack + sz;
 732         ptr += sizeof(uint32_t)) {
 733        if (*(uint32_t *)ptr != 0xdeadbeaf) {
 734            break;
 735        }
 736    }
 737    usage = sz - (uintptr_t) (ptr - stack);
 738    if (usage > max_stack_usage) {
 739        error_report("thread %d max stack usage increased from %u to %u",
 740                     qemu_get_thread_id(), max_stack_usage, usage);
 741        max_stack_usage = usage;
 742    }
 743#endif
 744
 745    munmap(stack, sz);
 746}
 747
 748/*
 749 * Disable CFI checks.
 750 * We are going to call a signal hander directly. Such handler may or may not
 751 * have been defined in our binary, so there's no guarantee that the pointer
 752 * used to set the handler is a cfi-valid pointer. Since the handlers are
 753 * stored in kernel memory, changing the handler to an attacker-defined
 754 * function requires being able to call a sigaction() syscall,
 755 * which is not as easy as overwriting a pointer in memory.
 756 */
 757QEMU_DISABLE_CFI
 758void sigaction_invoke(struct sigaction *action,
 759                      struct qemu_signalfd_siginfo *info)
 760{
 761    siginfo_t si = {};
 762    si.si_signo = info->ssi_signo;
 763    si.si_errno = info->ssi_errno;
 764    si.si_code = info->ssi_code;
 765
 766    /* Convert the minimal set of fields defined by POSIX.
 767     * Positive si_code values are reserved for kernel-generated
 768     * signals, where the valid siginfo fields are determined by
 769     * the signal number.  But according to POSIX, it is unspecified
 770     * whether SI_USER and SI_QUEUE have values less than or equal to
 771     * zero.
 772     */
 773    if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE ||
 774        info->ssi_code <= 0) {
 775        /* SIGTERM, etc.  */
 776        si.si_pid = info->ssi_pid;
 777        si.si_uid = info->ssi_uid;
 778    } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE ||
 779               info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) {
 780        si.si_addr = (void *)(uintptr_t)info->ssi_addr;
 781    } else if (info->ssi_signo == SIGCHLD) {
 782        si.si_pid = info->ssi_pid;
 783        si.si_status = info->ssi_status;
 784        si.si_uid = info->ssi_uid;
 785    }
 786    action->sa_sigaction(info->ssi_signo, &si, NULL);
 787}
 788
 789size_t qemu_get_host_physmem(void)
 790{
 791#ifdef _SC_PHYS_PAGES
 792    long pages = sysconf(_SC_PHYS_PAGES);
 793    if (pages > 0) {
 794        if (pages > SIZE_MAX / qemu_real_host_page_size()) {
 795            return SIZE_MAX;
 796        } else {
 797            return pages * qemu_real_host_page_size();
 798        }
 799    }
 800#endif
 801    return 0;
 802}
 803
 804int qemu_msync(void *addr, size_t length, int fd)
 805{
 806    size_t align_mask = ~(qemu_real_host_page_size() - 1);
 807
 808    /**
 809     * There are no strict reqs as per the length of mapping
 810     * to be synced. Still the length needs to follow the address
 811     * alignment changes. Additionally - round the size to the multiple
 812     * of PAGE_SIZE
 813     */
 814    length += ((uintptr_t)addr & (qemu_real_host_page_size() - 1));
 815    length = (length + ~align_mask) & align_mask;
 816
 817    addr = (void *)((uintptr_t)addr & align_mask);
 818
 819    return msync(addr, length, MS_SYNC);
 820}
 821