qemu/util/osdep.c
<<
>>
Prefs
   1/*
   2 * QEMU low level functions
   3 *
   4 * Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24#include "qemu/osdep.h"
  25#include "qapi/error.h"
  26#include "qemu/cutils.h"
  27#include "qemu/sockets.h"
  28#include "qemu/error-report.h"
  29#include "qemu/madvise.h"
  30#include "qemu/mprotect.h"
  31#include "qemu/hw-version.h"
  32#include "monitor/monitor.h"
  33
  34static const char *hw_version = QEMU_HW_VERSION;
  35
  36int socket_set_cork(int fd, int v)
  37{
  38#if defined(SOL_TCP) && defined(TCP_CORK)
  39    return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
  40#else
  41    return 0;
  42#endif
  43}
  44
  45int socket_set_nodelay(int fd)
  46{
  47    int v = 1;
  48    return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
  49}
  50
  51int qemu_madvise(void *addr, size_t len, int advice)
  52{
  53    if (advice == QEMU_MADV_INVALID) {
  54        errno = EINVAL;
  55        return -1;
  56    }
  57#if defined(CONFIG_MADVISE)
  58    return madvise(addr, len, advice);
  59#elif defined(CONFIG_POSIX_MADVISE)
  60    return posix_madvise(addr, len, advice);
  61#else
  62    errno = EINVAL;
  63    return -1;
  64#endif
  65}
  66
  67static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
  68{
  69    g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask()));
  70    g_assert(!(size & ~qemu_real_host_page_mask()));
  71
  72#ifdef _WIN32
  73    DWORD old_protect;
  74
  75    if (!VirtualProtect(addr, size, prot, &old_protect)) {
  76        g_autofree gchar *emsg = g_win32_error_message(GetLastError());
  77        error_report("%s: VirtualProtect failed: %s", __func__, emsg);
  78        return -1;
  79    }
  80    return 0;
  81#else
  82    if (mprotect(addr, size, prot)) {
  83        error_report("%s: mprotect failed: %s", __func__, strerror(errno));
  84        return -1;
  85    }
  86    return 0;
  87#endif
  88}
  89
  90int qemu_mprotect_rw(void *addr, size_t size)
  91{
  92#ifdef _WIN32
  93    return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
  94#else
  95    return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
  96#endif
  97}
  98
  99int qemu_mprotect_rwx(void *addr, size_t size)
 100{
 101#ifdef _WIN32
 102    return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
 103#else
 104    return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
 105#endif
 106}
 107
 108int qemu_mprotect_none(void *addr, size_t size)
 109{
 110#ifdef _WIN32
 111    return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
 112#else
 113    return qemu_mprotect__osdep(addr, size, PROT_NONE);
 114#endif
 115}
 116
 117#ifndef _WIN32
 118
 119static int fcntl_op_setlk = -1;
 120static int fcntl_op_getlk = -1;
 121
 122/*
 123 * Dups an fd and sets the flags
 124 */
 125int qemu_dup_flags(int fd, int flags)
 126{
 127    int ret;
 128    int serrno;
 129    int dup_flags;
 130
 131    ret = qemu_dup(fd);
 132    if (ret == -1) {
 133        goto fail;
 134    }
 135
 136    dup_flags = fcntl(ret, F_GETFL);
 137    if (dup_flags == -1) {
 138        goto fail;
 139    }
 140
 141    if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
 142        errno = EINVAL;
 143        goto fail;
 144    }
 145
 146    /* Set/unset flags that we can with fcntl */
 147    if (fcntl(ret, F_SETFL, flags) == -1) {
 148        goto fail;
 149    }
 150
 151    /* Truncate the file in the cases that open() would truncate it */
 152    if (flags & O_TRUNC ||
 153            ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
 154        if (ftruncate(ret, 0) == -1) {
 155            goto fail;
 156        }
 157    }
 158
 159    return ret;
 160
 161fail:
 162    serrno = errno;
 163    if (ret != -1) {
 164        close(ret);
 165    }
 166    errno = serrno;
 167    return -1;
 168}
 169
 170int qemu_dup(int fd)
 171{
 172    int ret;
 173#ifdef F_DUPFD_CLOEXEC
 174    ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
 175#else
 176    ret = dup(fd);
 177    if (ret != -1) {
 178        qemu_set_cloexec(ret);
 179    }
 180#endif
 181    return ret;
 182}
 183
 184static int qemu_parse_fdset(const char *param)
 185{
 186    return qemu_parse_fd(param);
 187}
 188
 189static void qemu_probe_lock_ops(void)
 190{
 191    if (fcntl_op_setlk == -1) {
 192#ifdef F_OFD_SETLK
 193        int fd;
 194        int ret;
 195        struct flock fl = {
 196            .l_whence = SEEK_SET,
 197            .l_start  = 0,
 198            .l_len    = 0,
 199            .l_type   = F_WRLCK,
 200        };
 201
 202        fd = open("/dev/null", O_RDWR);
 203        if (fd < 0) {
 204            fprintf(stderr,
 205                    "Failed to open /dev/null for OFD lock probing: %s\n",
 206                    strerror(errno));
 207            fcntl_op_setlk = F_SETLK;
 208            fcntl_op_getlk = F_GETLK;
 209            return;
 210        }
 211        ret = fcntl(fd, F_OFD_GETLK, &fl);
 212        close(fd);
 213        if (!ret) {
 214            fcntl_op_setlk = F_OFD_SETLK;
 215            fcntl_op_getlk = F_OFD_GETLK;
 216        } else {
 217            fcntl_op_setlk = F_SETLK;
 218            fcntl_op_getlk = F_GETLK;
 219        }
 220#else
 221        fcntl_op_setlk = F_SETLK;
 222        fcntl_op_getlk = F_GETLK;
 223#endif
 224    }
 225}
 226
 227bool qemu_has_ofd_lock(void)
 228{
 229    qemu_probe_lock_ops();
 230#ifdef F_OFD_SETLK
 231    return fcntl_op_setlk == F_OFD_SETLK;
 232#else
 233    return false;
 234#endif
 235}
 236
 237static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
 238{
 239    int ret;
 240    struct flock fl = {
 241        .l_whence = SEEK_SET,
 242        .l_start  = start,
 243        .l_len    = len,
 244        .l_type   = fl_type,
 245    };
 246    qemu_probe_lock_ops();
 247    do {
 248        ret = fcntl(fd, fcntl_op_setlk, &fl);
 249    } while (ret == -1 && errno == EINTR);
 250    return ret == -1 ? -errno : 0;
 251}
 252
 253int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
 254{
 255    return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
 256}
 257
 258int qemu_unlock_fd(int fd, int64_t start, int64_t len)
 259{
 260    return qemu_lock_fcntl(fd, start, len, F_UNLCK);
 261}
 262
 263int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
 264{
 265    int ret;
 266    struct flock fl = {
 267        .l_whence = SEEK_SET,
 268        .l_start  = start,
 269        .l_len    = len,
 270        .l_type   = exclusive ? F_WRLCK : F_RDLCK,
 271    };
 272    qemu_probe_lock_ops();
 273    ret = fcntl(fd, fcntl_op_getlk, &fl);
 274    if (ret == -1) {
 275        return -errno;
 276    } else {
 277        return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
 278    }
 279}
 280#endif
 281
 282static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
 283{
 284    int ret;
 285#ifdef O_CLOEXEC
 286    ret = open(name, flags | O_CLOEXEC, mode);
 287#else
 288    ret = open(name, flags, mode);
 289    if (ret >= 0) {
 290        qemu_set_cloexec(ret);
 291    }
 292#endif
 293    return ret;
 294}
 295
 296/*
 297 * Opens a file with FD_CLOEXEC set
 298 */
 299static int
 300qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
 301{
 302    int ret;
 303
 304#ifndef _WIN32
 305    const char *fdset_id_str;
 306
 307    /* Attempt dup of fd from fd set */
 308    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
 309        int64_t fdset_id;
 310        int dupfd;
 311
 312        fdset_id = qemu_parse_fdset(fdset_id_str);
 313        if (fdset_id == -1) {
 314            error_setg(errp, "Could not parse fdset %s", name);
 315            errno = EINVAL;
 316            return -1;
 317        }
 318
 319        dupfd = monitor_fdset_dup_fd_add(fdset_id, flags);
 320        if (dupfd == -1) {
 321            error_setg_errno(errp, errno, "Could not dup FD for %s flags %x",
 322                             name, flags);
 323            return -1;
 324        }
 325
 326        return dupfd;
 327    }
 328#endif
 329
 330    ret = qemu_open_cloexec(name, flags, mode);
 331
 332    if (ret == -1) {
 333        const char *action = flags & O_CREAT ? "create" : "open";
 334#ifdef O_DIRECT
 335        /* Give more helpful error message for O_DIRECT */
 336        if (errno == EINVAL && (flags & O_DIRECT)) {
 337            ret = open(name, flags & ~O_DIRECT, mode);
 338            if (ret != -1) {
 339                close(ret);
 340                error_setg(errp, "Could not %s '%s': "
 341                           "filesystem does not support O_DIRECT",
 342                           action, name);
 343                errno = EINVAL; /* restore first open()'s errno */
 344                return -1;
 345            }
 346        }
 347#endif /* O_DIRECT */
 348        error_setg_errno(errp, errno, "Could not %s '%s'",
 349                         action, name);
 350    }
 351
 352    return ret;
 353}
 354
 355
 356int qemu_open(const char *name, int flags, Error **errp)
 357{
 358    assert(!(flags & O_CREAT));
 359
 360    return qemu_open_internal(name, flags, 0, errp);
 361}
 362
 363
 364int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
 365{
 366    assert(!(flags & O_CREAT));
 367
 368    return qemu_open_internal(name, flags | O_CREAT, mode, errp);
 369}
 370
 371
 372int qemu_open_old(const char *name, int flags, ...)
 373{
 374    va_list ap;
 375    mode_t mode = 0;
 376    int ret;
 377
 378    va_start(ap, flags);
 379    if (flags & O_CREAT) {
 380        mode = va_arg(ap, int);
 381    }
 382    va_end(ap);
 383
 384    ret = qemu_open_internal(name, flags, mode, NULL);
 385
 386#ifdef O_DIRECT
 387    if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
 388        error_report("file system may not support O_DIRECT");
 389        errno = EINVAL; /* in case it was clobbered */
 390    }
 391#endif /* O_DIRECT */
 392
 393    return ret;
 394}
 395
 396int qemu_close(int fd)
 397{
 398    int64_t fdset_id;
 399
 400    /* Close fd that was dup'd from an fdset */
 401    fdset_id = monitor_fdset_dup_fd_find(fd);
 402    if (fdset_id != -1) {
 403        int ret;
 404
 405        ret = close(fd);
 406        if (ret == 0) {
 407            monitor_fdset_dup_fd_remove(fd);
 408        }
 409
 410        return ret;
 411    }
 412
 413    return close(fd);
 414}
 415
 416/*
 417 * Delete a file from the filesystem, unless the filename is /dev/fdset/...
 418 *
 419 * Returns: On success, zero is returned.  On error, -1 is returned,
 420 * and errno is set appropriately.
 421 */
 422int qemu_unlink(const char *name)
 423{
 424    if (g_str_has_prefix(name, "/dev/fdset/")) {
 425        return 0;
 426    }
 427
 428    return unlink(name);
 429}
 430
 431/*
 432 * A variant of write(2) which handles partial write.
 433 *
 434 * Return the number of bytes transferred.
 435 * Set errno if fewer than `count' bytes are written.
 436 *
 437 * This function don't work with non-blocking fd's.
 438 * Any of the possibilities with non-blocking fd's is bad:
 439 *   - return a short write (then name is wrong)
 440 *   - busy wait adding (errno == EAGAIN) to the loop
 441 */
 442ssize_t qemu_write_full(int fd, const void *buf, size_t count)
 443{
 444    ssize_t ret = 0;
 445    ssize_t total = 0;
 446
 447    while (count) {
 448        ret = write(fd, buf, count);
 449        if (ret < 0) {
 450            if (errno == EINTR)
 451                continue;
 452            break;
 453        }
 454
 455        count -= ret;
 456        buf += ret;
 457        total += ret;
 458    }
 459
 460    return total;
 461}
 462
 463/*
 464 * Opens a socket with FD_CLOEXEC set
 465 */
 466int qemu_socket(int domain, int type, int protocol)
 467{
 468    int ret;
 469
 470#ifdef SOCK_CLOEXEC
 471    ret = socket(domain, type | SOCK_CLOEXEC, protocol);
 472    if (ret != -1 || errno != EINVAL) {
 473        return ret;
 474    }
 475#endif
 476    ret = socket(domain, type, protocol);
 477    if (ret >= 0) {
 478        qemu_set_cloexec(ret);
 479    }
 480
 481    return ret;
 482}
 483
 484/*
 485 * Accept a connection and set FD_CLOEXEC
 486 */
 487int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
 488{
 489    int ret;
 490
 491#ifdef CONFIG_ACCEPT4
 492    ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
 493    if (ret != -1 || errno != ENOSYS) {
 494        return ret;
 495    }
 496#endif
 497    ret = accept(s, addr, addrlen);
 498    if (ret >= 0) {
 499        qemu_set_cloexec(ret);
 500    }
 501
 502    return ret;
 503}
 504
 505ssize_t qemu_send_full(int s, const void *buf, size_t count)
 506{
 507    ssize_t ret = 0;
 508    ssize_t total = 0;
 509
 510    while (count) {
 511        ret = send(s, buf, count, 0);
 512        if (ret < 0) {
 513            if (errno == EINTR) {
 514                continue;
 515            }
 516            break;
 517        }
 518
 519        count -= ret;
 520        buf += ret;
 521        total += ret;
 522    }
 523
 524    return total;
 525}
 526
 527void qemu_set_hw_version(const char *version)
 528{
 529    hw_version = version;
 530}
 531
 532const char *qemu_hw_version(void)
 533{
 534    return hw_version;
 535}
 536
 537#ifdef _WIN32
 538static void socket_cleanup(void)
 539{
 540    WSACleanup();
 541}
 542#endif
 543
 544int socket_init(void)
 545{
 546#ifdef _WIN32
 547    WSADATA Data;
 548    int ret, err;
 549
 550    ret = WSAStartup(MAKEWORD(2, 2), &Data);
 551    if (ret != 0) {
 552        err = WSAGetLastError();
 553        fprintf(stderr, "WSAStartup: %d\n", err);
 554        return -1;
 555    }
 556    atexit(socket_cleanup);
 557#endif
 558    return 0;
 559}
 560
 561
 562#ifndef CONFIG_IOVEC
 563static ssize_t
 564readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
 565{
 566    unsigned i = 0;
 567    ssize_t ret = 0;
 568    ssize_t off = 0;
 569    while (i < iov_cnt) {
 570        ssize_t r = do_write
 571            ? write(fd, iov[i].iov_base + off, iov[i].iov_len - off)
 572            : read(fd, iov[i].iov_base + off, iov[i].iov_len - off);
 573        if (r > 0) {
 574            ret += r;
 575            off += r;
 576            if (off < iov[i].iov_len) {
 577                continue;
 578            }
 579        } else if (!r) {
 580            break;
 581        } else if (errno == EINTR) {
 582            continue;
 583        } else {
 584            /* else it is some "other" error,
 585             * only return if there was no data processed. */
 586            if (ret == 0) {
 587                ret = -1;
 588            }
 589            break;
 590        }
 591        off = 0;
 592        i++;
 593    }
 594    return ret;
 595}
 596
 597ssize_t
 598readv(int fd, const struct iovec *iov, int iov_cnt)
 599{
 600    return readv_writev(fd, iov, iov_cnt, false);
 601}
 602
 603ssize_t
 604writev(int fd, const struct iovec *iov, int iov_cnt)
 605{
 606    return readv_writev(fd, iov, iov_cnt, true);
 607}
 608#endif
 609
 610/*
 611 * Make sure data goes on disk, but if possible do not bother to
 612 * write out the inode just for timestamp updates.
 613 *
 614 * Unfortunately even in 2009 many operating systems do not support
 615 * fdatasync and have to fall back to fsync.
 616 */
 617int qemu_fdatasync(int fd)
 618{
 619#ifdef CONFIG_FDATASYNC
 620    return fdatasync(fd);
 621#else
 622    return fsync(fd);
 623#endif
 624}
 625