qemu/util/oslib-posix.c
<<
>>
Prefs
   1/*
   2 * os-posix-lib.c
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2010 Red Hat, Inc.
   6 *
   7 * QEMU library functions on POSIX which are shared between QEMU and
   8 * the QEMU tools.
   9 *
  10 * Permission is hereby granted, free of charge, to any person obtaining a copy
  11 * of this software and associated documentation files (the "Software"), to deal
  12 * in the Software without restriction, including without limitation the rights
  13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 * copies of the Software, and to permit persons to whom the Software is
  15 * furnished to do so, subject to the following conditions:
  16 *
  17 * The above copyright notice and this permission notice shall be included in
  18 * all copies or substantial portions of the Software.
  19 *
  20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26 * THE SOFTWARE.
  27 */
  28
  29#include "qemu/osdep.h"
  30#include <termios.h>
  31
  32#include <glib/gprintf.h>
  33
  34#include "sysemu/sysemu.h"
  35#include "trace.h"
  36#include "qapi/error.h"
  37#include "qemu/sockets.h"
  38#include <libgen.h>
  39#include <sys/signal.h>
  40#include "qemu/cutils.h"
  41
  42#ifdef CONFIG_LINUX
  43#include <sys/syscall.h>
  44#endif
  45
  46#ifdef __FreeBSD__
  47#include <sys/sysctl.h>
  48#include <sys/user.h>
  49#include <libutil.h>
  50#endif
  51
  52#include "qemu/mmap-alloc.h"
  53
  54#ifdef CONFIG_DEBUG_STACK_USAGE
  55#include "qemu/error-report.h"
  56#endif
  57
  58int qemu_get_thread_id(void)
  59{
  60#if defined(__linux__)
  61    return syscall(SYS_gettid);
  62#else
  63    return getpid();
  64#endif
  65}
  66
  67int qemu_daemon(int nochdir, int noclose)
  68{
  69    return daemon(nochdir, noclose);
  70}
  71
  72void *qemu_oom_check(void *ptr)
  73{
  74    if (ptr == NULL) {
  75        fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
  76        abort();
  77    }
  78    return ptr;
  79}
  80
  81void *qemu_try_memalign(size_t alignment, size_t size)
  82{
  83    void *ptr;
  84
  85    if (alignment < sizeof(void*)) {
  86        alignment = sizeof(void*);
  87    }
  88
  89#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
  90    int ret;
  91    ret = posix_memalign(&ptr, alignment, size);
  92    if (ret != 0) {
  93        errno = ret;
  94        ptr = NULL;
  95    }
  96#elif defined(CONFIG_BSD)
  97    ptr = valloc(size);
  98#else
  99    ptr = memalign(alignment, size);
 100#endif
 101    trace_qemu_memalign(alignment, size, ptr);
 102    return ptr;
 103}
 104
 105void *qemu_memalign(size_t alignment, size_t size)
 106{
 107    return qemu_oom_check(qemu_try_memalign(alignment, size));
 108}
 109
 110/* alloc shared memory pages */
 111void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
 112{
 113    size_t align = QEMU_VMALLOC_ALIGN;
 114    void *ptr = qemu_ram_mmap(-1, size, align, false);
 115
 116    if (ptr == MAP_FAILED) {
 117        return NULL;
 118    }
 119
 120    if (alignment) {
 121        *alignment = align;
 122    }
 123
 124    trace_qemu_anon_ram_alloc(size, ptr);
 125    return ptr;
 126}
 127
 128void qemu_vfree(void *ptr)
 129{
 130    trace_qemu_vfree(ptr);
 131    free(ptr);
 132}
 133
 134void qemu_anon_ram_free(void *ptr, size_t size)
 135{
 136    trace_qemu_anon_ram_free(ptr, size);
 137    qemu_ram_munmap(ptr, size);
 138}
 139
 140void qemu_set_block(int fd)
 141{
 142    int f;
 143    f = fcntl(fd, F_GETFL);
 144    fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
 145}
 146
 147void qemu_set_nonblock(int fd)
 148{
 149    int f;
 150    f = fcntl(fd, F_GETFL);
 151    fcntl(fd, F_SETFL, f | O_NONBLOCK);
 152}
 153
 154int socket_set_fast_reuse(int fd)
 155{
 156    int val = 1, ret;
 157
 158    ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
 159                     (const char *)&val, sizeof(val));
 160
 161    assert(ret == 0);
 162
 163    return ret;
 164}
 165
 166void qemu_set_cloexec(int fd)
 167{
 168    int f;
 169    f = fcntl(fd, F_GETFD);
 170    fcntl(fd, F_SETFD, f | FD_CLOEXEC);
 171}
 172
 173/*
 174 * Creates a pipe with FD_CLOEXEC set on both file descriptors
 175 */
 176int qemu_pipe(int pipefd[2])
 177{
 178    int ret;
 179
 180#ifdef CONFIG_PIPE2
 181    ret = pipe2(pipefd, O_CLOEXEC);
 182    if (ret != -1 || errno != ENOSYS) {
 183        return ret;
 184    }
 185#endif
 186    ret = pipe(pipefd);
 187    if (ret == 0) {
 188        qemu_set_cloexec(pipefd[0]);
 189        qemu_set_cloexec(pipefd[1]);
 190    }
 191
 192    return ret;
 193}
 194
 195int qemu_utimens(const char *path, const struct timespec *times)
 196{
 197    struct timeval tv[2], tv_now;
 198    struct stat st;
 199    int i;
 200#ifdef CONFIG_UTIMENSAT
 201    int ret;
 202
 203    ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
 204    if (ret != -1 || errno != ENOSYS) {
 205        return ret;
 206    }
 207#endif
 208    /* Fallback: use utimes() instead of utimensat() */
 209
 210    /* happy if special cases */
 211    if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
 212        return 0;
 213    }
 214    if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
 215        return utimes(path, NULL);
 216    }
 217
 218    /* prepare for hard cases */
 219    if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
 220        gettimeofday(&tv_now, NULL);
 221    }
 222    if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
 223        stat(path, &st);
 224    }
 225
 226    for (i = 0; i < 2; i++) {
 227        if (times[i].tv_nsec == UTIME_NOW) {
 228            tv[i].tv_sec = tv_now.tv_sec;
 229            tv[i].tv_usec = tv_now.tv_usec;
 230        } else if (times[i].tv_nsec == UTIME_OMIT) {
 231            tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
 232            tv[i].tv_usec = 0;
 233        } else {
 234            tv[i].tv_sec = times[i].tv_sec;
 235            tv[i].tv_usec = times[i].tv_nsec / 1000;
 236        }
 237    }
 238
 239    return utimes(path, &tv[0]);
 240}
 241
 242char *
 243qemu_get_local_state_pathname(const char *relative_pathname)
 244{
 245    return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
 246                           relative_pathname);
 247}
 248
 249void qemu_set_tty_echo(int fd, bool echo)
 250{
 251    struct termios tty;
 252
 253    tcgetattr(fd, &tty);
 254
 255    if (echo) {
 256        tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
 257    } else {
 258        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
 259    }
 260
 261    tcsetattr(fd, TCSANOW, &tty);
 262}
 263
 264static char exec_dir[PATH_MAX];
 265
 266void qemu_init_exec_dir(const char *argv0)
 267{
 268    char *dir;
 269    char *p = NULL;
 270    char buf[PATH_MAX];
 271
 272    assert(!exec_dir[0]);
 273
 274#if defined(__linux__)
 275    {
 276        int len;
 277        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
 278        if (len > 0) {
 279            buf[len] = 0;
 280            p = buf;
 281        }
 282    }
 283#elif defined(__FreeBSD__)
 284    {
 285        static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
 286        size_t len = sizeof(buf) - 1;
 287
 288        *buf = '\0';
 289        if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
 290            *buf) {
 291            buf[sizeof(buf) - 1] = '\0';
 292            p = buf;
 293        }
 294    }
 295#endif
 296    /* If we don't have any way of figuring out the actual executable
 297       location then try argv[0].  */
 298    if (!p) {
 299        if (!argv0) {
 300            return;
 301        }
 302        p = realpath(argv0, buf);
 303        if (!p) {
 304            return;
 305        }
 306    }
 307    dir = g_path_get_dirname(p);
 308
 309    pstrcpy(exec_dir, sizeof(exec_dir), dir);
 310
 311    g_free(dir);
 312}
 313
 314char *qemu_get_exec_dir(void)
 315{
 316    return g_strdup(exec_dir);
 317}
 318
 319static sigjmp_buf sigjump;
 320
 321static void sigbus_handler(int signal)
 322{
 323    siglongjmp(sigjump, 1);
 324}
 325
 326void os_mem_prealloc(int fd, char *area, size_t memory, Error **errp)
 327{
 328    int ret;
 329    struct sigaction act, oldact;
 330    sigset_t set, oldset;
 331
 332    memset(&act, 0, sizeof(act));
 333    act.sa_handler = &sigbus_handler;
 334    act.sa_flags = 0;
 335
 336    ret = sigaction(SIGBUS, &act, &oldact);
 337    if (ret) {
 338        error_setg_errno(errp, errno,
 339            "os_mem_prealloc: failed to install signal handler");
 340        return;
 341    }
 342
 343    /* unblock SIGBUS */
 344    sigemptyset(&set);
 345    sigaddset(&set, SIGBUS);
 346    pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
 347
 348    if (sigsetjmp(sigjump, 1)) {
 349        error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
 350            "pages available to allocate guest RAM\n");
 351    } else {
 352        int i;
 353        size_t hpagesize = qemu_fd_getpagesize(fd);
 354        size_t numpages = DIV_ROUND_UP(memory, hpagesize);
 355
 356        /* MAP_POPULATE silently ignores failures */
 357        for (i = 0; i < numpages; i++) {
 358            memset(area + (hpagesize * i), 0, 1);
 359        }
 360    }
 361
 362    ret = sigaction(SIGBUS, &oldact, NULL);
 363    if (ret) {
 364        /* Terminate QEMU since it can't recover from error */
 365        perror("os_mem_prealloc: failed to reinstall signal handler");
 366        exit(1);
 367    }
 368    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
 369}
 370
 371
 372static struct termios oldtty;
 373
 374static void term_exit(void)
 375{
 376    tcsetattr(0, TCSANOW, &oldtty);
 377}
 378
 379static void term_init(void)
 380{
 381    struct termios tty;
 382
 383    tcgetattr(0, &tty);
 384    oldtty = tty;
 385
 386    tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
 387                          |INLCR|IGNCR|ICRNL|IXON);
 388    tty.c_oflag |= OPOST;
 389    tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
 390    tty.c_cflag &= ~(CSIZE|PARENB);
 391    tty.c_cflag |= CS8;
 392    tty.c_cc[VMIN] = 1;
 393    tty.c_cc[VTIME] = 0;
 394
 395    tcsetattr(0, TCSANOW, &tty);
 396
 397    atexit(term_exit);
 398}
 399
 400int qemu_read_password(char *buf, int buf_size)
 401{
 402    uint8_t ch;
 403    int i, ret;
 404
 405    printf("password: ");
 406    fflush(stdout);
 407    term_init();
 408    i = 0;
 409    for (;;) {
 410        ret = read(0, &ch, 1);
 411        if (ret == -1) {
 412            if (errno == EAGAIN || errno == EINTR) {
 413                continue;
 414            } else {
 415                break;
 416            }
 417        } else if (ret == 0) {
 418            ret = -1;
 419            break;
 420        } else {
 421            if (ch == '\r' ||
 422                ch == '\n') {
 423                ret = 0;
 424                break;
 425            }
 426            if (i < (buf_size - 1)) {
 427                buf[i++] = ch;
 428            }
 429        }
 430    }
 431    term_exit();
 432    buf[i] = '\0';
 433    printf("\n");
 434    return ret;
 435}
 436
 437
 438char *qemu_get_pid_name(pid_t pid)
 439{
 440    char *name = NULL;
 441
 442#if defined(__FreeBSD__)
 443    /* BSDs don't have /proc, but they provide a nice substitute */
 444    struct kinfo_proc *proc = kinfo_getproc(pid);
 445
 446    if (proc) {
 447        name = g_strdup(proc->ki_comm);
 448        free(proc);
 449    }
 450#else
 451    /* Assume a system with reasonable procfs */
 452    char *pid_path;
 453    size_t len;
 454
 455    pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
 456    g_file_get_contents(pid_path, &name, &len, NULL);
 457    g_free(pid_path);
 458#endif
 459
 460    return name;
 461}
 462
 463
 464pid_t qemu_fork(Error **errp)
 465{
 466    sigset_t oldmask, newmask;
 467    struct sigaction sig_action;
 468    int saved_errno;
 469    pid_t pid;
 470
 471    /*
 472     * Need to block signals now, so that child process can safely
 473     * kill off caller's signal handlers without a race.
 474     */
 475    sigfillset(&newmask);
 476    if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
 477        error_setg_errno(errp, errno,
 478                         "cannot block signals");
 479        return -1;
 480    }
 481
 482    pid = fork();
 483    saved_errno = errno;
 484
 485    if (pid < 0) {
 486        /* attempt to restore signal mask, but ignore failure, to
 487         * avoid obscuring the fork failure */
 488        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
 489        error_setg_errno(errp, saved_errno,
 490                         "cannot fork child process");
 491        errno = saved_errno;
 492        return -1;
 493    } else if (pid) {
 494        /* parent process */
 495
 496        /* Restore our original signal mask now that the child is
 497         * safely running. Only documented failures are EFAULT (not
 498         * possible, since we are using just-grabbed mask) or EINVAL
 499         * (not possible, since we are using correct arguments).  */
 500        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
 501    } else {
 502        /* child process */
 503        size_t i;
 504
 505        /* Clear out all signal handlers from parent so nothing
 506         * unexpected can happen in our child once we unblock
 507         * signals */
 508        sig_action.sa_handler = SIG_DFL;
 509        sig_action.sa_flags = 0;
 510        sigemptyset(&sig_action.sa_mask);
 511
 512        for (i = 1; i < NSIG; i++) {
 513            /* Only possible errors are EFAULT or EINVAL The former
 514             * won't happen, the latter we expect, so no need to check
 515             * return value */
 516            (void)sigaction(i, &sig_action, NULL);
 517        }
 518
 519        /* Unmask all signals in child, since we've no idea what the
 520         * caller's done with their signal mask and don't want to
 521         * propagate that to children */
 522        sigemptyset(&newmask);
 523        if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
 524            Error *local_err = NULL;
 525            error_setg_errno(&local_err, errno,
 526                             "cannot unblock signals");
 527            error_report_err(local_err);
 528            _exit(1);
 529        }
 530    }
 531    return pid;
 532}
 533
 534void *qemu_alloc_stack(size_t *sz)
 535{
 536    void *ptr, *guardpage;
 537#ifdef CONFIG_DEBUG_STACK_USAGE
 538    void *ptr2;
 539#endif
 540    size_t pagesz = getpagesize();
 541#ifdef _SC_THREAD_STACK_MIN
 542    /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
 543    long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
 544    *sz = MAX(MAX(min_stack_sz, 0), *sz);
 545#endif
 546    /* adjust stack size to a multiple of the page size */
 547    *sz = ROUND_UP(*sz, pagesz);
 548    /* allocate one extra page for the guard page */
 549    *sz += pagesz;
 550
 551    ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE,
 552               MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 553    if (ptr == MAP_FAILED) {
 554        abort();
 555    }
 556
 557#if defined(HOST_IA64)
 558    /* separate register stack */
 559    guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
 560#elif defined(HOST_HPPA)
 561    /* stack grows up */
 562    guardpage = ptr + *sz - pagesz;
 563#else
 564    /* stack grows down */
 565    guardpage = ptr;
 566#endif
 567    if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
 568        abort();
 569    }
 570
 571#ifdef CONFIG_DEBUG_STACK_USAGE
 572    for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
 573        *(uint32_t *)ptr2 = 0xdeadbeaf;
 574    }
 575#endif
 576
 577    return ptr;
 578}
 579
 580#ifdef CONFIG_DEBUG_STACK_USAGE
 581static __thread unsigned int max_stack_usage;
 582#endif
 583
 584void qemu_free_stack(void *stack, size_t sz)
 585{
 586#ifdef CONFIG_DEBUG_STACK_USAGE
 587    unsigned int usage;
 588    void *ptr;
 589
 590    for (ptr = stack + getpagesize(); ptr < stack + sz;
 591         ptr += sizeof(uint32_t)) {
 592        if (*(uint32_t *)ptr != 0xdeadbeaf) {
 593            break;
 594        }
 595    }
 596    usage = sz - (uintptr_t) (ptr - stack);
 597    if (usage > max_stack_usage) {
 598        error_report("thread %d max stack usage increased from %u to %u",
 599                     qemu_get_thread_id(), max_stack_usage, usage);
 600        max_stack_usage = usage;
 601    }
 602#endif
 603
 604    munmap(stack, sz);
 605}
 606