qemu/util/oslib-posix.c
<<
>>
Prefs
   1/*
   2 * os-posix-lib.c
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2010 Red Hat, Inc.
   6 *
   7 * QEMU library functions on POSIX which are shared between QEMU and
   8 * the QEMU tools.
   9 *
  10 * Permission is hereby granted, free of charge, to any person obtaining a copy
  11 * of this software and associated documentation files (the "Software"), to deal
  12 * in the Software without restriction, including without limitation the rights
  13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 * copies of the Software, and to permit persons to whom the Software is
  15 * furnished to do so, subject to the following conditions:
  16 *
  17 * The above copyright notice and this permission notice shall be included in
  18 * all copies or substantial portions of the Software.
  19 *
  20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26 * THE SOFTWARE.
  27 */
  28
  29/* The following block of code temporarily renames the daemon() function so the
  30   compiler does not see the warning associated with it in stdlib.h on OSX */
  31#ifdef __APPLE__
  32#define daemon qemu_fake_daemon_function
  33#include <stdlib.h>
  34#undef daemon
  35extern int daemon(int, int);
  36#endif
  37
  38#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__))
  39   /* Use 2 MiB alignment so transparent hugepages can be used by KVM.
  40      Valgrind does not support alignments larger than 1 MiB,
  41      therefore we need special code which handles running on Valgrind. */
  42#  define QEMU_VMALLOC_ALIGN (512 * 4096)
  43#elif defined(__linux__) && defined(__s390x__)
  44   /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
  45#  define QEMU_VMALLOC_ALIGN (256 * 4096)
  46#else
  47#  define QEMU_VMALLOC_ALIGN getpagesize()
  48#endif
  49
  50#include <termios.h>
  51#include <unistd.h>
  52#include <termios.h>
  53
  54#include <glib/gprintf.h>
  55
  56#include "config-host.h"
  57#include "sysemu/sysemu.h"
  58#include "trace.h"
  59#include "qemu/sockets.h"
  60#include <sys/mman.h>
  61#include <libgen.h>
  62#include <setjmp.h>
  63#include <sys/signal.h>
  64
  65#ifdef CONFIG_LINUX
  66#include <sys/syscall.h>
  67#endif
  68
  69#ifdef __FreeBSD__
  70#include <sys/sysctl.h>
  71#endif
  72
  73#include <qemu/mmap-alloc.h>
  74
  75int qemu_get_thread_id(void)
  76{
  77#if defined(__linux__)
  78    return syscall(SYS_gettid);
  79#else
  80    return getpid();
  81#endif
  82}
  83
  84int qemu_daemon(int nochdir, int noclose)
  85{
  86    return daemon(nochdir, noclose);
  87}
  88
  89void *qemu_oom_check(void *ptr)
  90{
  91    if (ptr == NULL) {
  92        fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
  93        abort();
  94    }
  95    return ptr;
  96}
  97
  98void *qemu_try_memalign(size_t alignment, size_t size)
  99{
 100    void *ptr;
 101
 102    if (alignment < sizeof(void*)) {
 103        alignment = sizeof(void*);
 104    }
 105
 106#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
 107    int ret;
 108    ret = posix_memalign(&ptr, alignment, size);
 109    if (ret != 0) {
 110        errno = ret;
 111        ptr = NULL;
 112    }
 113#elif defined(CONFIG_BSD)
 114    ptr = valloc(size);
 115#else
 116    ptr = memalign(alignment, size);
 117#endif
 118    trace_qemu_memalign(alignment, size, ptr);
 119    return ptr;
 120}
 121
 122void *qemu_memalign(size_t alignment, size_t size)
 123{
 124    return qemu_oom_check(qemu_try_memalign(alignment, size));
 125}
 126
 127/* alloc shared memory pages */
 128void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
 129{
 130    size_t align = QEMU_VMALLOC_ALIGN;
 131    void *ptr = qemu_ram_mmap(-1, size, align, false);
 132
 133    if (ptr == MAP_FAILED) {
 134        return NULL;
 135    }
 136
 137    if (alignment) {
 138        *alignment = align;
 139    }
 140
 141    trace_qemu_anon_ram_alloc(size, ptr);
 142    return ptr;
 143}
 144
 145void qemu_vfree(void *ptr)
 146{
 147    trace_qemu_vfree(ptr);
 148    free(ptr);
 149}
 150
 151void qemu_anon_ram_free(void *ptr, size_t size)
 152{
 153    trace_qemu_anon_ram_free(ptr, size);
 154    qemu_ram_munmap(ptr, size);
 155}
 156
 157void qemu_set_block(int fd)
 158{
 159    int f;
 160    f = fcntl(fd, F_GETFL);
 161    fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
 162}
 163
 164void qemu_set_nonblock(int fd)
 165{
 166    int f;
 167    f = fcntl(fd, F_GETFL);
 168    fcntl(fd, F_SETFL, f | O_NONBLOCK);
 169}
 170
 171int socket_set_fast_reuse(int fd)
 172{
 173    int val = 1, ret;
 174
 175    ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
 176                     (const char *)&val, sizeof(val));
 177
 178    assert(ret == 0);
 179
 180    return ret;
 181}
 182
 183void qemu_set_cloexec(int fd)
 184{
 185    int f;
 186    f = fcntl(fd, F_GETFD);
 187    fcntl(fd, F_SETFD, f | FD_CLOEXEC);
 188}
 189
 190/*
 191 * Creates a pipe with FD_CLOEXEC set on both file descriptors
 192 */
 193int qemu_pipe(int pipefd[2])
 194{
 195    int ret;
 196
 197#ifdef CONFIG_PIPE2
 198    ret = pipe2(pipefd, O_CLOEXEC);
 199    if (ret != -1 || errno != ENOSYS) {
 200        return ret;
 201    }
 202#endif
 203    ret = pipe(pipefd);
 204    if (ret == 0) {
 205        qemu_set_cloexec(pipefd[0]);
 206        qemu_set_cloexec(pipefd[1]);
 207    }
 208
 209    return ret;
 210}
 211
 212int qemu_utimens(const char *path, const struct timespec *times)
 213{
 214    struct timeval tv[2], tv_now;
 215    struct stat st;
 216    int i;
 217#ifdef CONFIG_UTIMENSAT
 218    int ret;
 219
 220    ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
 221    if (ret != -1 || errno != ENOSYS) {
 222        return ret;
 223    }
 224#endif
 225    /* Fallback: use utimes() instead of utimensat() */
 226
 227    /* happy if special cases */
 228    if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
 229        return 0;
 230    }
 231    if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
 232        return utimes(path, NULL);
 233    }
 234
 235    /* prepare for hard cases */
 236    if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
 237        gettimeofday(&tv_now, NULL);
 238    }
 239    if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
 240        stat(path, &st);
 241    }
 242
 243    for (i = 0; i < 2; i++) {
 244        if (times[i].tv_nsec == UTIME_NOW) {
 245            tv[i].tv_sec = tv_now.tv_sec;
 246            tv[i].tv_usec = tv_now.tv_usec;
 247        } else if (times[i].tv_nsec == UTIME_OMIT) {
 248            tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
 249            tv[i].tv_usec = 0;
 250        } else {
 251            tv[i].tv_sec = times[i].tv_sec;
 252            tv[i].tv_usec = times[i].tv_nsec / 1000;
 253        }
 254    }
 255
 256    return utimes(path, &tv[0]);
 257}
 258
 259char *
 260qemu_get_local_state_pathname(const char *relative_pathname)
 261{
 262    return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
 263                           relative_pathname);
 264}
 265
 266void qemu_set_tty_echo(int fd, bool echo)
 267{
 268    struct termios tty;
 269
 270    tcgetattr(fd, &tty);
 271
 272    if (echo) {
 273        tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
 274    } else {
 275        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
 276    }
 277
 278    tcsetattr(fd, TCSANOW, &tty);
 279}
 280
 281static char exec_dir[PATH_MAX];
 282
 283void qemu_init_exec_dir(const char *argv0)
 284{
 285    char *dir;
 286    char *p = NULL;
 287    char buf[PATH_MAX];
 288
 289    assert(!exec_dir[0]);
 290
 291#if defined(__linux__)
 292    {
 293        int len;
 294        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
 295        if (len > 0) {
 296            buf[len] = 0;
 297            p = buf;
 298        }
 299    }
 300#elif defined(__FreeBSD__)
 301    {
 302        static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
 303        size_t len = sizeof(buf) - 1;
 304
 305        *buf = '\0';
 306        if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
 307            *buf) {
 308            buf[sizeof(buf) - 1] = '\0';
 309            p = buf;
 310        }
 311    }
 312#endif
 313    /* If we don't have any way of figuring out the actual executable
 314       location then try argv[0].  */
 315    if (!p) {
 316        if (!argv0) {
 317            return;
 318        }
 319        p = realpath(argv0, buf);
 320        if (!p) {
 321            return;
 322        }
 323    }
 324    dir = dirname(p);
 325
 326    pstrcpy(exec_dir, sizeof(exec_dir), dir);
 327}
 328
 329char *qemu_get_exec_dir(void)
 330{
 331    return g_strdup(exec_dir);
 332}
 333
 334static sigjmp_buf sigjump;
 335
 336static void sigbus_handler(int signal)
 337{
 338    siglongjmp(sigjump, 1);
 339}
 340
 341void os_mem_prealloc(int fd, char *area, size_t memory)
 342{
 343    int ret;
 344    struct sigaction act, oldact;
 345    sigset_t set, oldset;
 346
 347    memset(&act, 0, sizeof(act));
 348    act.sa_handler = &sigbus_handler;
 349    act.sa_flags = 0;
 350
 351    ret = sigaction(SIGBUS, &act, &oldact);
 352    if (ret) {
 353        perror("os_mem_prealloc: failed to install signal handler");
 354        exit(1);
 355    }
 356
 357    /* unblock SIGBUS */
 358    sigemptyset(&set);
 359    sigaddset(&set, SIGBUS);
 360    pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
 361
 362    if (sigsetjmp(sigjump, 1)) {
 363        fprintf(stderr, "os_mem_prealloc: Insufficient free host memory "
 364                        "pages available to allocate guest RAM\n");
 365        exit(1);
 366    } else {
 367        int i;
 368        size_t hpagesize = qemu_fd_getpagesize(fd);
 369        size_t numpages = DIV_ROUND_UP(memory, hpagesize);
 370
 371        /* MAP_POPULATE silently ignores failures */
 372        for (i = 0; i < numpages; i++) {
 373            memset(area + (hpagesize * i), 0, 1);
 374        }
 375
 376        ret = sigaction(SIGBUS, &oldact, NULL);
 377        if (ret) {
 378            perror("os_mem_prealloc: failed to reinstall signal handler");
 379            exit(1);
 380        }
 381
 382        pthread_sigmask(SIG_SETMASK, &oldset, NULL);
 383    }
 384}
 385
 386
 387static struct termios oldtty;
 388
 389static void term_exit(void)
 390{
 391    tcsetattr(0, TCSANOW, &oldtty);
 392}
 393
 394static void term_init(void)
 395{
 396    struct termios tty;
 397
 398    tcgetattr(0, &tty);
 399    oldtty = tty;
 400
 401    tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
 402                          |INLCR|IGNCR|ICRNL|IXON);
 403    tty.c_oflag |= OPOST;
 404    tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
 405    tty.c_cflag &= ~(CSIZE|PARENB);
 406    tty.c_cflag |= CS8;
 407    tty.c_cc[VMIN] = 1;
 408    tty.c_cc[VTIME] = 0;
 409
 410    tcsetattr(0, TCSANOW, &tty);
 411
 412    atexit(term_exit);
 413}
 414
 415int qemu_read_password(char *buf, int buf_size)
 416{
 417    uint8_t ch;
 418    int i, ret;
 419
 420    printf("password: ");
 421    fflush(stdout);
 422    term_init();
 423    i = 0;
 424    for (;;) {
 425        ret = read(0, &ch, 1);
 426        if (ret == -1) {
 427            if (errno == EAGAIN || errno == EINTR) {
 428                continue;
 429            } else {
 430                break;
 431            }
 432        } else if (ret == 0) {
 433            ret = -1;
 434            break;
 435        } else {
 436            if (ch == '\r' ||
 437                ch == '\n') {
 438                ret = 0;
 439                break;
 440            }
 441            if (i < (buf_size - 1)) {
 442                buf[i++] = ch;
 443            }
 444        }
 445    }
 446    term_exit();
 447    buf[i] = '\0';
 448    printf("\n");
 449    return ret;
 450}
 451
 452
 453pid_t qemu_fork(Error **errp)
 454{
 455    sigset_t oldmask, newmask;
 456    struct sigaction sig_action;
 457    int saved_errno;
 458    pid_t pid;
 459
 460    /*
 461     * Need to block signals now, so that child process can safely
 462     * kill off caller's signal handlers without a race.
 463     */
 464    sigfillset(&newmask);
 465    if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
 466        error_setg_errno(errp, errno,
 467                         "cannot block signals");
 468        return -1;
 469    }
 470
 471    pid = fork();
 472    saved_errno = errno;
 473
 474    if (pid < 0) {
 475        /* attempt to restore signal mask, but ignore failure, to
 476         * avoid obscuring the fork failure */
 477        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
 478        error_setg_errno(errp, saved_errno,
 479                         "cannot fork child process");
 480        errno = saved_errno;
 481        return -1;
 482    } else if (pid) {
 483        /* parent process */
 484
 485        /* Restore our original signal mask now that the child is
 486         * safely running. Only documented failures are EFAULT (not
 487         * possible, since we are using just-grabbed mask) or EINVAL
 488         * (not possible, since we are using correct arguments).  */
 489        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
 490    } else {
 491        /* child process */
 492        size_t i;
 493
 494        /* Clear out all signal handlers from parent so nothing
 495         * unexpected can happen in our child once we unblock
 496         * signals */
 497        sig_action.sa_handler = SIG_DFL;
 498        sig_action.sa_flags = 0;
 499        sigemptyset(&sig_action.sa_mask);
 500
 501        for (i = 1; i < NSIG; i++) {
 502            /* Only possible errors are EFAULT or EINVAL The former
 503             * won't happen, the latter we expect, so no need to check
 504             * return value */
 505            (void)sigaction(i, &sig_action, NULL);
 506        }
 507
 508        /* Unmask all signals in child, since we've no idea what the
 509         * caller's done with their signal mask and don't want to
 510         * propagate that to children */
 511        sigemptyset(&newmask);
 512        if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
 513            Error *local_err = NULL;
 514            error_setg_errno(&local_err, errno,
 515                             "cannot unblock signals");
 516            error_report_err(local_err);
 517            _exit(1);
 518        }
 519    }
 520    return pid;
 521}
 522