qemu/util/oslib-posix.c
<<
>>
Prefs
   1/*
   2 * os-posix-lib.c
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 * Copyright (c) 2010 Red Hat, Inc.
   6 *
   7 * QEMU library functions on POSIX which are shared between QEMU and
   8 * the QEMU tools.
   9 *
  10 * Permission is hereby granted, free of charge, to any person obtaining a copy
  11 * of this software and associated documentation files (the "Software"), to deal
  12 * in the Software without restriction, including without limitation the rights
  13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 * copies of the Software, and to permit persons to whom the Software is
  15 * furnished to do so, subject to the following conditions:
  16 *
  17 * The above copyright notice and this permission notice shall be included in
  18 * all copies or substantial portions of the Software.
  19 *
  20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26 * THE SOFTWARE.
  27 */
  28
  29/* The following block of code temporarily renames the daemon() function so the
  30   compiler does not see the warning associated with it in stdlib.h on OSX */
  31#ifdef __APPLE__
  32#define daemon qemu_fake_daemon_function
  33#include <stdlib.h>
  34#undef daemon
  35extern int daemon(int, int);
  36#endif
  37
  38#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__))
  39   /* Use 2 MiB alignment so transparent hugepages can be used by KVM.
  40      Valgrind does not support alignments larger than 1 MiB,
  41      therefore we need special code which handles running on Valgrind. */
  42#  define QEMU_VMALLOC_ALIGN (512 * 4096)
  43#elif defined(__linux__) && defined(__s390x__)
  44   /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
  45#  define QEMU_VMALLOC_ALIGN (256 * 4096)
  46#else
  47#  define QEMU_VMALLOC_ALIGN getpagesize()
  48#endif
  49#define HUGETLBFS_MAGIC       0x958458f6
  50
  51#include <termios.h>
  52#include <unistd.h>
  53
  54#include <glib/gprintf.h>
  55
  56#include "config-host.h"
  57#include "sysemu/sysemu.h"
  58#include "trace.h"
  59#include "qemu/sockets.h"
  60#include <sys/mman.h>
  61#include <libgen.h>
  62#include <setjmp.h>
  63#include <sys/signal.h>
  64
  65#ifdef CONFIG_LINUX
  66#include <sys/syscall.h>
  67#include <sys/vfs.h>
  68#endif
  69
  70#ifdef __FreeBSD__
  71#include <sys/sysctl.h>
  72#endif
  73
  74int qemu_get_thread_id(void)
  75{
  76#if defined(__linux__)
  77    return syscall(SYS_gettid);
  78#else
  79    return getpid();
  80#endif
  81}
  82
  83int qemu_daemon(int nochdir, int noclose)
  84{
  85    return daemon(nochdir, noclose);
  86}
  87
  88void *qemu_oom_check(void *ptr)
  89{
  90    if (ptr == NULL) {
  91        fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
  92        abort();
  93    }
  94    return ptr;
  95}
  96
  97void *qemu_try_memalign(size_t alignment, size_t size)
  98{
  99    void *ptr;
 100
 101    if (alignment < sizeof(void*)) {
 102        alignment = sizeof(void*);
 103    }
 104
 105#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
 106    int ret;
 107    ret = posix_memalign(&ptr, alignment, size);
 108    if (ret != 0) {
 109        errno = ret;
 110        ptr = NULL;
 111    }
 112#elif defined(CONFIG_BSD)
 113    ptr = valloc(size);
 114#else
 115    ptr = memalign(alignment, size);
 116#endif
 117    trace_qemu_memalign(alignment, size, ptr);
 118    return ptr;
 119}
 120
 121void *qemu_memalign(size_t alignment, size_t size)
 122{
 123    return qemu_oom_check(qemu_try_memalign(alignment, size));
 124}
 125
 126/* alloc shared memory pages */
 127void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
 128{
 129    size_t align = QEMU_VMALLOC_ALIGN;
 130    size_t total = size + align - getpagesize();
 131    void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
 132                     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 133    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
 134
 135    if (ptr == MAP_FAILED) {
 136        return NULL;
 137    }
 138
 139    if (alignment) {
 140        *alignment = align;
 141    }
 142    ptr += offset;
 143    total -= offset;
 144
 145    if (offset > 0) {
 146        munmap(ptr - offset, offset);
 147    }
 148    if (total > size) {
 149        munmap(ptr + size, total - size);
 150    }
 151
 152    trace_qemu_anon_ram_alloc(size, ptr);
 153    return ptr;
 154}
 155
 156void qemu_vfree(void *ptr)
 157{
 158    trace_qemu_vfree(ptr);
 159    free(ptr);
 160}
 161
 162void qemu_anon_ram_free(void *ptr, size_t size)
 163{
 164    trace_qemu_anon_ram_free(ptr, size);
 165    if (ptr) {
 166        munmap(ptr, size);
 167    }
 168}
 169
 170void qemu_set_block(int fd)
 171{
 172    int f;
 173    f = fcntl(fd, F_GETFL);
 174    fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
 175}
 176
 177void qemu_set_nonblock(int fd)
 178{
 179    int f;
 180    f = fcntl(fd, F_GETFL);
 181    fcntl(fd, F_SETFL, f | O_NONBLOCK);
 182}
 183
 184int socket_set_fast_reuse(int fd)
 185{
 186    int val = 1, ret;
 187
 188    ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
 189                     (const char *)&val, sizeof(val));
 190
 191    assert(ret == 0);
 192
 193    return ret;
 194}
 195
 196void qemu_set_cloexec(int fd)
 197{
 198    int f;
 199    f = fcntl(fd, F_GETFD);
 200    fcntl(fd, F_SETFD, f | FD_CLOEXEC);
 201}
 202
 203/*
 204 * Creates a pipe with FD_CLOEXEC set on both file descriptors
 205 */
 206int qemu_pipe(int pipefd[2])
 207{
 208    int ret;
 209
 210#ifdef CONFIG_PIPE2
 211    ret = pipe2(pipefd, O_CLOEXEC);
 212    if (ret != -1 || errno != ENOSYS) {
 213        return ret;
 214    }
 215#endif
 216    ret = pipe(pipefd);
 217    if (ret == 0) {
 218        qemu_set_cloexec(pipefd[0]);
 219        qemu_set_cloexec(pipefd[1]);
 220    }
 221
 222    return ret;
 223}
 224
 225int qemu_utimens(const char *path, const struct timespec *times)
 226{
 227    struct timeval tv[2], tv_now;
 228    struct stat st;
 229    int i;
 230#ifdef CONFIG_UTIMENSAT
 231    int ret;
 232
 233    ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
 234    if (ret != -1 || errno != ENOSYS) {
 235        return ret;
 236    }
 237#endif
 238    /* Fallback: use utimes() instead of utimensat() */
 239
 240    /* happy if special cases */
 241    if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
 242        return 0;
 243    }
 244    if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
 245        return utimes(path, NULL);
 246    }
 247
 248    /* prepare for hard cases */
 249    if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
 250        gettimeofday(&tv_now, NULL);
 251    }
 252    if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
 253        stat(path, &st);
 254    }
 255
 256    for (i = 0; i < 2; i++) {
 257        if (times[i].tv_nsec == UTIME_NOW) {
 258            tv[i].tv_sec = tv_now.tv_sec;
 259            tv[i].tv_usec = tv_now.tv_usec;
 260        } else if (times[i].tv_nsec == UTIME_OMIT) {
 261            tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
 262            tv[i].tv_usec = 0;
 263        } else {
 264            tv[i].tv_sec = times[i].tv_sec;
 265            tv[i].tv_usec = times[i].tv_nsec / 1000;
 266        }
 267    }
 268
 269    return utimes(path, &tv[0]);
 270}
 271
 272char *
 273qemu_get_local_state_pathname(const char *relative_pathname)
 274{
 275    return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
 276                           relative_pathname);
 277}
 278
 279void qemu_set_tty_echo(int fd, bool echo)
 280{
 281    struct termios tty;
 282
 283    tcgetattr(fd, &tty);
 284
 285    if (echo) {
 286        tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
 287    } else {
 288        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
 289    }
 290
 291    tcsetattr(fd, TCSANOW, &tty);
 292}
 293
 294static char exec_dir[PATH_MAX];
 295
 296void qemu_init_exec_dir(const char *argv0)
 297{
 298    char *dir;
 299    char *p = NULL;
 300    char buf[PATH_MAX];
 301
 302    assert(!exec_dir[0]);
 303
 304#if defined(__linux__)
 305    {
 306        int len;
 307        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
 308        if (len > 0) {
 309            buf[len] = 0;
 310            p = buf;
 311        }
 312    }
 313#elif defined(__FreeBSD__)
 314    {
 315        static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
 316        size_t len = sizeof(buf) - 1;
 317
 318        *buf = '\0';
 319        if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
 320            *buf) {
 321            buf[sizeof(buf) - 1] = '\0';
 322            p = buf;
 323        }
 324    }
 325#endif
 326    /* If we don't have any way of figuring out the actual executable
 327       location then try argv[0].  */
 328    if (!p) {
 329        if (!argv0) {
 330            return;
 331        }
 332        p = realpath(argv0, buf);
 333        if (!p) {
 334            return;
 335        }
 336    }
 337    dir = dirname(p);
 338
 339    pstrcpy(exec_dir, sizeof(exec_dir), dir);
 340}
 341
 342char *qemu_get_exec_dir(void)
 343{
 344    return g_strdup(exec_dir);
 345}
 346
 347static sigjmp_buf sigjump;
 348
 349static void sigbus_handler(int signal)
 350{
 351    siglongjmp(sigjump, 1);
 352}
 353
 354static size_t fd_getpagesize(int fd)
 355{
 356#ifdef CONFIG_LINUX
 357    struct statfs fs;
 358    int ret;
 359
 360    if (fd != -1) {
 361        do {
 362            ret = fstatfs(fd, &fs);
 363        } while (ret != 0 && errno == EINTR);
 364
 365        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
 366            return fs.f_bsize;
 367        }
 368    }
 369#endif
 370
 371    return getpagesize();
 372}
 373
 374void os_mem_prealloc(int fd, char *area, size_t memory)
 375{
 376    int ret;
 377    struct sigaction act, oldact;
 378    sigset_t set, oldset;
 379
 380    memset(&act, 0, sizeof(act));
 381    act.sa_handler = &sigbus_handler;
 382    act.sa_flags = 0;
 383
 384    ret = sigaction(SIGBUS, &act, &oldact);
 385    if (ret) {
 386        perror("os_mem_prealloc: failed to install signal handler");
 387        exit(1);
 388    }
 389
 390    /* unblock SIGBUS */
 391    sigemptyset(&set);
 392    sigaddset(&set, SIGBUS);
 393    pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
 394
 395    if (sigsetjmp(sigjump, 1)) {
 396        fprintf(stderr, "os_mem_prealloc: Insufficient free host memory "
 397                        "pages available to allocate guest RAM\n");
 398        exit(1);
 399    } else {
 400        int i;
 401        size_t hpagesize = fd_getpagesize(fd);
 402
 403        /* MAP_POPULATE silently ignores failures */
 404        memory = (memory + hpagesize - 1) & -hpagesize;
 405        for (i = 0; i < (memory / hpagesize); i++) {
 406            memset(area + (hpagesize * i), 0, 1);
 407        }
 408
 409        ret = sigaction(SIGBUS, &oldact, NULL);
 410        if (ret) {
 411            perror("os_mem_prealloc: failed to reinstall signal handler");
 412            exit(1);
 413        }
 414
 415        pthread_sigmask(SIG_SETMASK, &oldset, NULL);
 416    }
 417}
 418