qemu/util/osdep.c
<<
>>
Prefs
   1/*
   2 * QEMU low level functions
   3 *
   4 * Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24#include "qemu/osdep.h"
  25#include "qapi/error.h"
  26
  27/* Needed early for CONFIG_BSD etc. */
  28
  29#ifdef CONFIG_SOLARIS
  30#include <sys/statvfs.h>
  31/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
  32   discussion about Solaris header problems */
  33extern int madvise(char *, size_t, int);
  34#endif
  35
  36#include "qemu-common.h"
  37#include "qemu/cutils.h"
  38#include "qemu/sockets.h"
  39#include "qemu/error-report.h"
  40#include "monitor/monitor.h"
  41
  42static bool fips_enabled = false;
  43
  44static const char *hw_version = QEMU_HW_VERSION;
  45
  46int socket_set_cork(int fd, int v)
  47{
  48#if defined(SOL_TCP) && defined(TCP_CORK)
  49    return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
  50#else
  51    return 0;
  52#endif
  53}
  54
  55int socket_set_nodelay(int fd)
  56{
  57    int v = 1;
  58    return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
  59}
  60
  61int qemu_madvise(void *addr, size_t len, int advice)
  62{
  63    if (advice == QEMU_MADV_INVALID) {
  64        errno = EINVAL;
  65        return -1;
  66    }
  67#if defined(CONFIG_MADVISE)
  68    return madvise(addr, len, advice);
  69#elif defined(CONFIG_POSIX_MADVISE)
  70    return posix_madvise(addr, len, advice);
  71#else
  72    errno = EINVAL;
  73    return -1;
  74#endif
  75}
  76
  77static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
  78{
  79    g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask));
  80    g_assert(!(size & ~qemu_real_host_page_mask));
  81
  82#ifdef _WIN32
  83    DWORD old_protect;
  84
  85    if (!VirtualProtect(addr, size, prot, &old_protect)) {
  86        g_autofree gchar *emsg = g_win32_error_message(GetLastError());
  87        error_report("%s: VirtualProtect failed: %s", __func__, emsg);
  88        return -1;
  89    }
  90    return 0;
  91#else
  92    if (mprotect(addr, size, prot)) {
  93        error_report("%s: mprotect failed: %s", __func__, strerror(errno));
  94        return -1;
  95    }
  96    return 0;
  97#endif
  98}
  99
 100int qemu_mprotect_rw(void *addr, size_t size)
 101{
 102#ifdef _WIN32
 103    return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
 104#else
 105    return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
 106#endif
 107}
 108
 109int qemu_mprotect_rwx(void *addr, size_t size)
 110{
 111#ifdef _WIN32
 112    return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
 113#else
 114    return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
 115#endif
 116}
 117
 118int qemu_mprotect_none(void *addr, size_t size)
 119{
 120#ifdef _WIN32
 121    return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
 122#else
 123    return qemu_mprotect__osdep(addr, size, PROT_NONE);
 124#endif
 125}
 126
 127#ifndef _WIN32
 128
 129static int fcntl_op_setlk = -1;
 130static int fcntl_op_getlk = -1;
 131
 132/*
 133 * Dups an fd and sets the flags
 134 */
 135int qemu_dup_flags(int fd, int flags)
 136{
 137    int ret;
 138    int serrno;
 139    int dup_flags;
 140
 141    ret = qemu_dup(fd);
 142    if (ret == -1) {
 143        goto fail;
 144    }
 145
 146    dup_flags = fcntl(ret, F_GETFL);
 147    if (dup_flags == -1) {
 148        goto fail;
 149    }
 150
 151    if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
 152        errno = EINVAL;
 153        goto fail;
 154    }
 155
 156    /* Set/unset flags that we can with fcntl */
 157    if (fcntl(ret, F_SETFL, flags) == -1) {
 158        goto fail;
 159    }
 160
 161    /* Truncate the file in the cases that open() would truncate it */
 162    if (flags & O_TRUNC ||
 163            ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
 164        if (ftruncate(ret, 0) == -1) {
 165            goto fail;
 166        }
 167    }
 168
 169    return ret;
 170
 171fail:
 172    serrno = errno;
 173    if (ret != -1) {
 174        close(ret);
 175    }
 176    errno = serrno;
 177    return -1;
 178}
 179
 180int qemu_dup(int fd)
 181{
 182    int ret;
 183#ifdef F_DUPFD_CLOEXEC
 184    ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
 185#else
 186    ret = dup(fd);
 187    if (ret != -1) {
 188        qemu_set_cloexec(ret);
 189    }
 190#endif
 191    return ret;
 192}
 193
 194static int qemu_parse_fdset(const char *param)
 195{
 196    return qemu_parse_fd(param);
 197}
 198
 199static void qemu_probe_lock_ops(void)
 200{
 201    if (fcntl_op_setlk == -1) {
 202#ifdef F_OFD_SETLK
 203        int fd;
 204        int ret;
 205        struct flock fl = {
 206            .l_whence = SEEK_SET,
 207            .l_start  = 0,
 208            .l_len    = 0,
 209            .l_type   = F_WRLCK,
 210        };
 211
 212        fd = open("/dev/null", O_RDWR);
 213        if (fd < 0) {
 214            fprintf(stderr,
 215                    "Failed to open /dev/null for OFD lock probing: %s\n",
 216                    strerror(errno));
 217            fcntl_op_setlk = F_SETLK;
 218            fcntl_op_getlk = F_GETLK;
 219            return;
 220        }
 221        ret = fcntl(fd, F_OFD_GETLK, &fl);
 222        close(fd);
 223        if (!ret) {
 224            fcntl_op_setlk = F_OFD_SETLK;
 225            fcntl_op_getlk = F_OFD_GETLK;
 226        } else {
 227            fcntl_op_setlk = F_SETLK;
 228            fcntl_op_getlk = F_GETLK;
 229        }
 230#else
 231        fcntl_op_setlk = F_SETLK;
 232        fcntl_op_getlk = F_GETLK;
 233#endif
 234    }
 235}
 236
 237bool qemu_has_ofd_lock(void)
 238{
 239    qemu_probe_lock_ops();
 240#ifdef F_OFD_SETLK
 241    return fcntl_op_setlk == F_OFD_SETLK;
 242#else
 243    return false;
 244#endif
 245}
 246
 247static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
 248{
 249    int ret;
 250    struct flock fl = {
 251        .l_whence = SEEK_SET,
 252        .l_start  = start,
 253        .l_len    = len,
 254        .l_type   = fl_type,
 255    };
 256    qemu_probe_lock_ops();
 257    do {
 258        ret = fcntl(fd, fcntl_op_setlk, &fl);
 259    } while (ret == -1 && errno == EINTR);
 260    return ret == -1 ? -errno : 0;
 261}
 262
 263int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
 264{
 265    return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
 266}
 267
 268int qemu_unlock_fd(int fd, int64_t start, int64_t len)
 269{
 270    return qemu_lock_fcntl(fd, start, len, F_UNLCK);
 271}
 272
 273int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
 274{
 275    int ret;
 276    struct flock fl = {
 277        .l_whence = SEEK_SET,
 278        .l_start  = start,
 279        .l_len    = len,
 280        .l_type   = exclusive ? F_WRLCK : F_RDLCK,
 281    };
 282    qemu_probe_lock_ops();
 283    ret = fcntl(fd, fcntl_op_getlk, &fl);
 284    if (ret == -1) {
 285        return -errno;
 286    } else {
 287        return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
 288    }
 289}
 290#endif
 291
 292static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
 293{
 294    int ret;
 295#ifdef O_CLOEXEC
 296    ret = open(name, flags | O_CLOEXEC, mode);
 297#else
 298    ret = open(name, flags, mode);
 299    if (ret >= 0) {
 300        qemu_set_cloexec(ret);
 301    }
 302#endif
 303    return ret;
 304}
 305
 306/*
 307 * Opens a file with FD_CLOEXEC set
 308 */
 309static int
 310qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
 311{
 312    int ret;
 313
 314#ifndef _WIN32
 315    const char *fdset_id_str;
 316
 317    /* Attempt dup of fd from fd set */
 318    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
 319        int64_t fdset_id;
 320        int dupfd;
 321
 322        fdset_id = qemu_parse_fdset(fdset_id_str);
 323        if (fdset_id == -1) {
 324            error_setg(errp, "Could not parse fdset %s", name);
 325            errno = EINVAL;
 326            return -1;
 327        }
 328
 329        dupfd = monitor_fdset_dup_fd_add(fdset_id, flags);
 330        if (dupfd == -1) {
 331            error_setg_errno(errp, errno, "Could not dup FD for %s flags %x",
 332                             name, flags);
 333            return -1;
 334        }
 335
 336        return dupfd;
 337    }
 338#endif
 339
 340    ret = qemu_open_cloexec(name, flags, mode);
 341
 342    if (ret == -1) {
 343        const char *action = flags & O_CREAT ? "create" : "open";
 344#ifdef O_DIRECT
 345        /* Give more helpful error message for O_DIRECT */
 346        if (errno == EINVAL && (flags & O_DIRECT)) {
 347            ret = open(name, flags & ~O_DIRECT, mode);
 348            if (ret != -1) {
 349                close(ret);
 350                error_setg(errp, "Could not %s '%s': "
 351                           "filesystem does not support O_DIRECT",
 352                           action, name);
 353                errno = EINVAL; /* restore first open()'s errno */
 354                return -1;
 355            }
 356        }
 357#endif /* O_DIRECT */
 358        error_setg_errno(errp, errno, "Could not %s '%s'",
 359                         action, name);
 360    }
 361
 362    return ret;
 363}
 364
 365
 366int qemu_open(const char *name, int flags, Error **errp)
 367{
 368    assert(!(flags & O_CREAT));
 369
 370    return qemu_open_internal(name, flags, 0, errp);
 371}
 372
 373
 374int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
 375{
 376    assert(!(flags & O_CREAT));
 377
 378    return qemu_open_internal(name, flags | O_CREAT, mode, errp);
 379}
 380
 381
 382int qemu_open_old(const char *name, int flags, ...)
 383{
 384    va_list ap;
 385    mode_t mode = 0;
 386    int ret;
 387
 388    va_start(ap, flags);
 389    if (flags & O_CREAT) {
 390        mode = va_arg(ap, int);
 391    }
 392    va_end(ap);
 393
 394    ret = qemu_open_internal(name, flags, mode, NULL);
 395
 396#ifdef O_DIRECT
 397    if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
 398        error_report("file system may not support O_DIRECT");
 399        errno = EINVAL; /* in case it was clobbered */
 400    }
 401#endif /* O_DIRECT */
 402
 403    return ret;
 404}
 405
 406int qemu_close(int fd)
 407{
 408    int64_t fdset_id;
 409
 410    /* Close fd that was dup'd from an fdset */
 411    fdset_id = monitor_fdset_dup_fd_find(fd);
 412    if (fdset_id != -1) {
 413        int ret;
 414
 415        ret = close(fd);
 416        if (ret == 0) {
 417            monitor_fdset_dup_fd_remove(fd);
 418        }
 419
 420        return ret;
 421    }
 422
 423    return close(fd);
 424}
 425
 426/*
 427 * Delete a file from the filesystem, unless the filename is /dev/fdset/...
 428 *
 429 * Returns: On success, zero is returned.  On error, -1 is returned,
 430 * and errno is set appropriately.
 431 */
 432int qemu_unlink(const char *name)
 433{
 434    if (g_str_has_prefix(name, "/dev/fdset/")) {
 435        return 0;
 436    }
 437
 438    return unlink(name);
 439}
 440
 441/*
 442 * A variant of write(2) which handles partial write.
 443 *
 444 * Return the number of bytes transferred.
 445 * Set errno if fewer than `count' bytes are written.
 446 *
 447 * This function don't work with non-blocking fd's.
 448 * Any of the possibilities with non-blocking fd's is bad:
 449 *   - return a short write (then name is wrong)
 450 *   - busy wait adding (errno == EAGAIN) to the loop
 451 */
 452ssize_t qemu_write_full(int fd, const void *buf, size_t count)
 453{
 454    ssize_t ret = 0;
 455    ssize_t total = 0;
 456
 457    while (count) {
 458        ret = write(fd, buf, count);
 459        if (ret < 0) {
 460            if (errno == EINTR)
 461                continue;
 462            break;
 463        }
 464
 465        count -= ret;
 466        buf += ret;
 467        total += ret;
 468    }
 469
 470    return total;
 471}
 472
 473/*
 474 * Opens a socket with FD_CLOEXEC set
 475 */
 476int qemu_socket(int domain, int type, int protocol)
 477{
 478    int ret;
 479
 480#ifdef SOCK_CLOEXEC
 481    ret = socket(domain, type | SOCK_CLOEXEC, protocol);
 482    if (ret != -1 || errno != EINVAL) {
 483        return ret;
 484    }
 485#endif
 486    ret = socket(domain, type, protocol);
 487    if (ret >= 0) {
 488        qemu_set_cloexec(ret);
 489    }
 490
 491    return ret;
 492}
 493
 494/*
 495 * Accept a connection and set FD_CLOEXEC
 496 */
 497int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
 498{
 499    int ret;
 500
 501#ifdef CONFIG_ACCEPT4
 502    ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
 503    if (ret != -1 || errno != ENOSYS) {
 504        return ret;
 505    }
 506#endif
 507    ret = accept(s, addr, addrlen);
 508    if (ret >= 0) {
 509        qemu_set_cloexec(ret);
 510    }
 511
 512    return ret;
 513}
 514
 515void qemu_set_hw_version(const char *version)
 516{
 517    hw_version = version;
 518}
 519
 520const char *qemu_hw_version(void)
 521{
 522    return hw_version;
 523}
 524
 525void fips_set_state(bool requested)
 526{
 527#ifdef __linux__
 528    if (requested) {
 529        FILE *fds = fopen("/proc/sys/crypto/fips_enabled", "r");
 530        if (fds != NULL) {
 531            fips_enabled = (fgetc(fds) == '1');
 532            fclose(fds);
 533        }
 534    }
 535#else
 536    fips_enabled = false;
 537#endif /* __linux__ */
 538
 539#ifdef _FIPS_DEBUG
 540    fprintf(stderr, "FIPS mode %s (requested %s)\n",
 541            (fips_enabled ? "enabled" : "disabled"),
 542            (requested ? "enabled" : "disabled"));
 543#endif
 544}
 545
 546bool fips_get_state(void)
 547{
 548    return fips_enabled;
 549}
 550
 551#ifdef _WIN32
 552static void socket_cleanup(void)
 553{
 554    WSACleanup();
 555}
 556#endif
 557
 558int socket_init(void)
 559{
 560#ifdef _WIN32
 561    WSADATA Data;
 562    int ret, err;
 563
 564    ret = WSAStartup(MAKEWORD(2, 2), &Data);
 565    if (ret != 0) {
 566        err = WSAGetLastError();
 567        fprintf(stderr, "WSAStartup: %d\n", err);
 568        return -1;
 569    }
 570    atexit(socket_cleanup);
 571#endif
 572    return 0;
 573}
 574
 575
 576#ifndef CONFIG_IOVEC
 577/* helper function for iov_send_recv() */
 578static ssize_t
 579readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
 580{
 581    unsigned i = 0;
 582    ssize_t ret = 0;
 583    while (i < iov_cnt) {
 584        ssize_t r = do_write
 585            ? write(fd, iov[i].iov_base, iov[i].iov_len)
 586            : read(fd, iov[i].iov_base, iov[i].iov_len);
 587        if (r > 0) {
 588            ret += r;
 589        } else if (!r) {
 590            break;
 591        } else if (errno == EINTR) {
 592            continue;
 593        } else {
 594            /* else it is some "other" error,
 595             * only return if there was no data processed. */
 596            if (ret == 0) {
 597                ret = -1;
 598            }
 599            break;
 600        }
 601        i++;
 602    }
 603    return ret;
 604}
 605
 606ssize_t
 607readv(int fd, const struct iovec *iov, int iov_cnt)
 608{
 609    return readv_writev(fd, iov, iov_cnt, false);
 610}
 611
 612ssize_t
 613writev(int fd, const struct iovec *iov, int iov_cnt)
 614{
 615    return readv_writev(fd, iov, iov_cnt, true);
 616}
 617#endif
 618