qemu/util/oslib-posix.c
<<
>>
Prefs
   1/*
   2 * os-posix-lib.c
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2010 Red Hat, Inc.
   6 *
   7 * QEMU library functions on POSIX which are shared between QEMU and
   8 * the QEMU tools.
   9 *
  10 * Permission is hereby granted, free of charge, to any person obtaining a copy
  11 * of this software and associated documentation files (the "Software"), to deal
  12 * in the Software without restriction, including without limitation the rights
  13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 * copies of the Software, and to permit persons to whom the Software is
  15 * furnished to do so, subject to the following conditions:
  16 *
  17 * The above copyright notice and this permission notice shall be included in
  18 * all copies or substantial portions of the Software.
  19 *
  20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26 * THE SOFTWARE.
  27 */
  28
  29#if defined(__linux__) && \
  30    (defined(__x86_64__) || defined(__arm__) || defined(__aarch64__))
  31   /* Use 2 MiB alignment so transparent hugepages can be used by KVM.
  32      Valgrind does not support alignments larger than 1 MiB,
  33      therefore we need special code which handles running on Valgrind. */
  34#  define QEMU_VMALLOC_ALIGN (512 * 4096)
  35#elif defined(__linux__) && defined(__s390x__)
  36   /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
  37#  define QEMU_VMALLOC_ALIGN (256 * 4096)
  38#else
  39#  define QEMU_VMALLOC_ALIGN getpagesize()
  40#endif
  41
  42#include "qemu/osdep.h"
  43#include <termios.h>
  44#include <termios.h>
  45
  46#include <glib/gprintf.h>
  47
  48#include "sysemu/sysemu.h"
  49#include "trace.h"
  50#include "qapi/error.h"
  51#include "qemu/sockets.h"
  52#include <sys/mman.h>
  53#include <libgen.h>
  54#include <sys/signal.h>
  55#include "qemu/cutils.h"
  56
  57#ifdef CONFIG_LINUX
  58#include <sys/syscall.h>
  59#endif
  60
  61#ifdef __FreeBSD__
  62#include <sys/sysctl.h>
  63#endif
  64
  65#include <qemu/mmap-alloc.h>
  66
  67int qemu_get_thread_id(void)
  68{
  69#if defined(__linux__)
  70    return syscall(SYS_gettid);
  71#else
  72    return getpid();
  73#endif
  74}
  75
  76int qemu_daemon(int nochdir, int noclose)
  77{
  78    return daemon(nochdir, noclose);
  79}
  80
  81void *qemu_oom_check(void *ptr)
  82{
  83    if (ptr == NULL) {
  84        fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
  85        abort();
  86    }
  87    return ptr;
  88}
  89
  90void *qemu_try_memalign(size_t alignment, size_t size)
  91{
  92    void *ptr;
  93
  94    if (alignment < sizeof(void*)) {
  95        alignment = sizeof(void*);
  96    }
  97
  98#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
  99    int ret;
 100    ret = posix_memalign(&ptr, alignment, size);
 101    if (ret != 0) {
 102        errno = ret;
 103        ptr = NULL;
 104    }
 105#elif defined(CONFIG_BSD)
 106    ptr = valloc(size);
 107#else
 108    ptr = memalign(alignment, size);
 109#endif
 110    trace_qemu_memalign(alignment, size, ptr);
 111    return ptr;
 112}
 113
 114void *qemu_memalign(size_t alignment, size_t size)
 115{
 116    return qemu_oom_check(qemu_try_memalign(alignment, size));
 117}
 118
 119/* alloc shared memory pages */
 120void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
 121{
 122    size_t align = QEMU_VMALLOC_ALIGN;
 123    void *ptr = qemu_ram_mmap(-1, size, align, false);
 124
 125    if (ptr == MAP_FAILED) {
 126        return NULL;
 127    }
 128
 129    if (alignment) {
 130        *alignment = align;
 131    }
 132
 133    trace_qemu_anon_ram_alloc(size, ptr);
 134    return ptr;
 135}
 136
 137void qemu_vfree(void *ptr)
 138{
 139    trace_qemu_vfree(ptr);
 140    free(ptr);
 141}
 142
 143void qemu_anon_ram_free(void *ptr, size_t size)
 144{
 145    trace_qemu_anon_ram_free(ptr, size);
 146    qemu_ram_munmap(ptr, size);
 147}
 148
 149void qemu_set_block(int fd)
 150{
 151    int f;
 152    f = fcntl(fd, F_GETFL);
 153    fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
 154}
 155
 156void qemu_set_nonblock(int fd)
 157{
 158    int f;
 159    f = fcntl(fd, F_GETFL);
 160    fcntl(fd, F_SETFL, f | O_NONBLOCK);
 161}
 162
 163int socket_set_fast_reuse(int fd)
 164{
 165    int val = 1, ret;
 166
 167    ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
 168                     (const char *)&val, sizeof(val));
 169
 170    assert(ret == 0);
 171
 172    return ret;
 173}
 174
 175void qemu_set_cloexec(int fd)
 176{
 177    int f;
 178    f = fcntl(fd, F_GETFD);
 179    fcntl(fd, F_SETFD, f | FD_CLOEXEC);
 180}
 181
 182/*
 183 * Creates a pipe with FD_CLOEXEC set on both file descriptors
 184 */
 185int qemu_pipe(int pipefd[2])
 186{
 187    int ret;
 188
 189#ifdef CONFIG_PIPE2
 190    ret = pipe2(pipefd, O_CLOEXEC);
 191    if (ret != -1 || errno != ENOSYS) {
 192        return ret;
 193    }
 194#endif
 195    ret = pipe(pipefd);
 196    if (ret == 0) {
 197        qemu_set_cloexec(pipefd[0]);
 198        qemu_set_cloexec(pipefd[1]);
 199    }
 200
 201    return ret;
 202}
 203
 204int qemu_utimens(const char *path, const struct timespec *times)
 205{
 206    struct timeval tv[2], tv_now;
 207    struct stat st;
 208    int i;
 209#ifdef CONFIG_UTIMENSAT
 210    int ret;
 211
 212    ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
 213    if (ret != -1 || errno != ENOSYS) {
 214        return ret;
 215    }
 216#endif
 217    /* Fallback: use utimes() instead of utimensat() */
 218
 219    /* happy if special cases */
 220    if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
 221        return 0;
 222    }
 223    if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
 224        return utimes(path, NULL);
 225    }
 226
 227    /* prepare for hard cases */
 228    if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
 229        gettimeofday(&tv_now, NULL);
 230    }
 231    if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
 232        stat(path, &st);
 233    }
 234
 235    for (i = 0; i < 2; i++) {
 236        if (times[i].tv_nsec == UTIME_NOW) {
 237            tv[i].tv_sec = tv_now.tv_sec;
 238            tv[i].tv_usec = tv_now.tv_usec;
 239        } else if (times[i].tv_nsec == UTIME_OMIT) {
 240            tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
 241            tv[i].tv_usec = 0;
 242        } else {
 243            tv[i].tv_sec = times[i].tv_sec;
 244            tv[i].tv_usec = times[i].tv_nsec / 1000;
 245        }
 246    }
 247
 248    return utimes(path, &tv[0]);
 249}
 250
 251char *
 252qemu_get_local_state_pathname(const char *relative_pathname)
 253{
 254    return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
 255                           relative_pathname);
 256}
 257
 258void qemu_set_tty_echo(int fd, bool echo)
 259{
 260    struct termios tty;
 261
 262    tcgetattr(fd, &tty);
 263
 264    if (echo) {
 265        tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
 266    } else {
 267        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
 268    }
 269
 270    tcsetattr(fd, TCSANOW, &tty);
 271}
 272
 273static char exec_dir[PATH_MAX];
 274
 275void qemu_init_exec_dir(const char *argv0)
 276{
 277    char *dir;
 278    char *p = NULL;
 279    char buf[PATH_MAX];
 280
 281    assert(!exec_dir[0]);
 282
 283#if defined(__linux__)
 284    {
 285        int len;
 286        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
 287        if (len > 0) {
 288            buf[len] = 0;
 289            p = buf;
 290        }
 291    }
 292#elif defined(__FreeBSD__)
 293    {
 294        static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
 295        size_t len = sizeof(buf) - 1;
 296
 297        *buf = '\0';
 298        if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
 299            *buf) {
 300            buf[sizeof(buf) - 1] = '\0';
 301            p = buf;
 302        }
 303    }
 304#endif
 305    /* If we don't have any way of figuring out the actual executable
 306       location then try argv[0].  */
 307    if (!p) {
 308        if (!argv0) {
 309            return;
 310        }
 311        p = realpath(argv0, buf);
 312        if (!p) {
 313            return;
 314        }
 315    }
 316    dir = dirname(p);
 317
 318    pstrcpy(exec_dir, sizeof(exec_dir), dir);
 319}
 320
 321char *qemu_get_exec_dir(void)
 322{
 323    return g_strdup(exec_dir);
 324}
 325
 326static sigjmp_buf sigjump;
 327
 328static void sigbus_handler(int signal)
 329{
 330    siglongjmp(sigjump, 1);
 331}
 332
 333void os_mem_prealloc(int fd, char *area, size_t memory)
 334{
 335    int ret;
 336    struct sigaction act, oldact;
 337    sigset_t set, oldset;
 338
 339    memset(&act, 0, sizeof(act));
 340    act.sa_handler = &sigbus_handler;
 341    act.sa_flags = 0;
 342
 343    ret = sigaction(SIGBUS, &act, &oldact);
 344    if (ret) {
 345        perror("os_mem_prealloc: failed to install signal handler");
 346        exit(1);
 347    }
 348
 349    /* unblock SIGBUS */
 350    sigemptyset(&set);
 351    sigaddset(&set, SIGBUS);
 352    pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
 353
 354    if (sigsetjmp(sigjump, 1)) {
 355        fprintf(stderr, "os_mem_prealloc: Insufficient free host memory "
 356                        "pages available to allocate guest RAM\n");
 357        exit(1);
 358    } else {
 359        int i;
 360        size_t hpagesize = qemu_fd_getpagesize(fd);
 361        size_t numpages = DIV_ROUND_UP(memory, hpagesize);
 362
 363        /* MAP_POPULATE silently ignores failures */
 364        for (i = 0; i < numpages; i++) {
 365            memset(area + (hpagesize * i), 0, 1);
 366        }
 367
 368        ret = sigaction(SIGBUS, &oldact, NULL);
 369        if (ret) {
 370            perror("os_mem_prealloc: failed to reinstall signal handler");
 371            exit(1);
 372        }
 373
 374        pthread_sigmask(SIG_SETMASK, &oldset, NULL);
 375    }
 376}
 377
 378
 379static struct termios oldtty;
 380
 381static void term_exit(void)
 382{
 383    tcsetattr(0, TCSANOW, &oldtty);
 384}
 385
 386static void term_init(void)
 387{
 388    struct termios tty;
 389
 390    tcgetattr(0, &tty);
 391    oldtty = tty;
 392
 393    tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
 394                          |INLCR|IGNCR|ICRNL|IXON);
 395    tty.c_oflag |= OPOST;
 396    tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
 397    tty.c_cflag &= ~(CSIZE|PARENB);
 398    tty.c_cflag |= CS8;
 399    tty.c_cc[VMIN] = 1;
 400    tty.c_cc[VTIME] = 0;
 401
 402    tcsetattr(0, TCSANOW, &tty);
 403
 404    atexit(term_exit);
 405}
 406
 407int qemu_read_password(char *buf, int buf_size)
 408{
 409    uint8_t ch;
 410    int i, ret;
 411
 412    printf("password: ");
 413    fflush(stdout);
 414    term_init();
 415    i = 0;
 416    for (;;) {
 417        ret = read(0, &ch, 1);
 418        if (ret == -1) {
 419            if (errno == EAGAIN || errno == EINTR) {
 420                continue;
 421            } else {
 422                break;
 423            }
 424        } else if (ret == 0) {
 425            ret = -1;
 426            break;
 427        } else {
 428            if (ch == '\r' ||
 429                ch == '\n') {
 430                ret = 0;
 431                break;
 432            }
 433            if (i < (buf_size - 1)) {
 434                buf[i++] = ch;
 435            }
 436        }
 437    }
 438    term_exit();
 439    buf[i] = '\0';
 440    printf("\n");
 441    return ret;
 442}
 443
 444
 445pid_t qemu_fork(Error **errp)
 446{
 447    sigset_t oldmask, newmask;
 448    struct sigaction sig_action;
 449    int saved_errno;
 450    pid_t pid;
 451
 452    /*
 453     * Need to block signals now, so that child process can safely
 454     * kill off caller's signal handlers without a race.
 455     */
 456    sigfillset(&newmask);
 457    if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
 458        error_setg_errno(errp, errno,
 459                         "cannot block signals");
 460        return -1;
 461    }
 462
 463    pid = fork();
 464    saved_errno = errno;
 465
 466    if (pid < 0) {
 467        /* attempt to restore signal mask, but ignore failure, to
 468         * avoid obscuring the fork failure */
 469        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
 470        error_setg_errno(errp, saved_errno,
 471                         "cannot fork child process");
 472        errno = saved_errno;
 473        return -1;
 474    } else if (pid) {
 475        /* parent process */
 476
 477        /* Restore our original signal mask now that the child is
 478         * safely running. Only documented failures are EFAULT (not
 479         * possible, since we are using just-grabbed mask) or EINVAL
 480         * (not possible, since we are using correct arguments).  */
 481        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
 482    } else {
 483        /* child process */
 484        size_t i;
 485
 486        /* Clear out all signal handlers from parent so nothing
 487         * unexpected can happen in our child once we unblock
 488         * signals */
 489        sig_action.sa_handler = SIG_DFL;
 490        sig_action.sa_flags = 0;
 491        sigemptyset(&sig_action.sa_mask);
 492
 493        for (i = 1; i < NSIG; i++) {
 494            /* Only possible errors are EFAULT or EINVAL The former
 495             * won't happen, the latter we expect, so no need to check
 496             * return value */
 497            (void)sigaction(i, &sig_action, NULL);
 498        }
 499
 500        /* Unmask all signals in child, since we've no idea what the
 501         * caller's done with their signal mask and don't want to
 502         * propagate that to children */
 503        sigemptyset(&newmask);
 504        if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
 505            Error *local_err = NULL;
 506            error_setg_errno(&local_err, errno,
 507                             "cannot unblock signals");
 508            error_report_err(local_err);
 509            _exit(1);
 510        }
 511    }
 512    return pid;
 513}
 514