qemu/migration/qemu-file.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24#include "qemu/osdep.h"
  25#include "qemu/madvise.h"
  26#include "qemu/error-report.h"
  27#include "qemu/iov.h"
  28#include "migration.h"
  29#include "migration-stats.h"
  30#include "qemu-file.h"
  31#include "trace.h"
  32#include "options.h"
  33#include "qapi/error.h"
  34#include "rdma.h"
  35#include "io/channel-file.h"
  36
  37#define IO_BUF_SIZE 32768
  38#define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
  39
  40typedef struct FdEntry {
  41    QTAILQ_ENTRY(FdEntry) entry;
  42    int fd;
  43} FdEntry;
  44
  45struct QEMUFile {
  46    QIOChannel *ioc;
  47    bool is_writable;
  48
  49    int buf_index;
  50    int buf_size; /* 0 when writing */
  51    uint8_t buf[IO_BUF_SIZE];
  52
  53    DECLARE_BITMAP(may_free, MAX_IOV_SIZE);
  54    struct iovec iov[MAX_IOV_SIZE];
  55    unsigned int iovcnt;
  56
  57    int last_error;
  58    Error *last_error_obj;
  59
  60    bool can_pass_fd;
  61    QTAILQ_HEAD(, FdEntry) fds;
  62};
  63
  64/*
  65 * Stop a file from being read/written - not all backing files can do this
  66 * typically only sockets can.
  67 *
  68 * TODO: convert to propagate Error objects instead of squashing
  69 * to a fixed errno value
  70 */
  71int qemu_file_shutdown(QEMUFile *f)
  72{
  73    Error *err = NULL;
  74
  75    /*
  76     * We must set qemufile error before the real shutdown(), otherwise
  77     * there can be a race window where we thought IO all went though
  78     * (because last_error==NULL) but actually IO has already stopped.
  79     *
  80     * If without correct ordering, the race can happen like this:
  81     *
  82     *      page receiver                     other thread
  83     *      -------------                     ------------
  84     *      qemu_get_buffer()
  85     *                                        do shutdown()
  86     *        returns 0 (buffer all zero)
  87     *        (we didn't check this retcode)
  88     *      try to detect IO error
  89     *        last_error==NULL, IO okay
  90     *      install ALL-ZERO page
  91     *                                        set last_error
  92     *      --> guest crash!
  93     */
  94    if (!f->last_error) {
  95        qemu_file_set_error(f, -EIO);
  96    }
  97
  98    if (!qio_channel_has_feature(f->ioc,
  99                                 QIO_CHANNEL_FEATURE_SHUTDOWN)) {
 100        return -ENOSYS;
 101    }
 102
 103    if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, &err) < 0) {
 104        error_report_err(err);
 105        return -EIO;
 106    }
 107
 108    return 0;
 109}
 110
 111static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
 112{
 113    QEMUFile *f;
 114
 115    f = g_new0(QEMUFile, 1);
 116
 117    object_ref(ioc);
 118    f->ioc = ioc;
 119    f->is_writable = is_writable;
 120    f->can_pass_fd = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
 121    QTAILQ_INIT(&f->fds);
 122
 123    return f;
 124}
 125
 126/*
 127 * Result: QEMUFile* for a 'return path' for comms in the opposite direction
 128 *         NULL if not available
 129 */
 130QEMUFile *qemu_file_get_return_path(QEMUFile *f)
 131{
 132    return qemu_file_new_impl(f->ioc, !f->is_writable);
 133}
 134
 135QEMUFile *qemu_file_new_output(QIOChannel *ioc)
 136{
 137    return qemu_file_new_impl(ioc, true);
 138}
 139
 140QEMUFile *qemu_file_new_input(QIOChannel *ioc)
 141{
 142    return qemu_file_new_impl(ioc, false);
 143}
 144
 145/*
 146 * Get last error for stream f with optional Error*
 147 *
 148 * Return negative error value if there has been an error on previous
 149 * operations, return 0 if no error happened.
 150 *
 151 * If errp is specified, a verbose error message will be copied over.
 152 */
 153int qemu_file_get_error_obj(QEMUFile *f, Error **errp)
 154{
 155    if (!f->last_error) {
 156        return 0;
 157    }
 158
 159    /* There is an error */
 160    if (errp) {
 161        if (f->last_error_obj) {
 162            *errp = error_copy(f->last_error_obj);
 163        } else {
 164            error_setg_errno(errp, -f->last_error, "Channel error");
 165        }
 166    }
 167
 168    return f->last_error;
 169}
 170
 171/*
 172 * Get last error for either stream f1 or f2 with optional Error*.
 173 * The error returned (non-zero) can be either from f1 or f2.
 174 *
 175 * If any of the qemufile* is NULL, then skip the check on that file.
 176 *
 177 * When there is no error on both qemufile, zero is returned.
 178 */
 179int qemu_file_get_error_obj_any(QEMUFile *f1, QEMUFile *f2, Error **errp)
 180{
 181    int ret = 0;
 182
 183    if (f1) {
 184        ret = qemu_file_get_error_obj(f1, errp);
 185        /* If there's already error detected, return */
 186        if (ret) {
 187            return ret;
 188        }
 189    }
 190
 191    if (f2) {
 192        ret = qemu_file_get_error_obj(f2, errp);
 193    }
 194
 195    return ret;
 196}
 197
 198/*
 199 * Set the last error for stream f with optional Error*
 200 */
 201void qemu_file_set_error_obj(QEMUFile *f, int ret, Error *err)
 202{
 203    if (f->last_error == 0 && ret) {
 204        f->last_error = ret;
 205        error_propagate(&f->last_error_obj, err);
 206    } else if (err) {
 207        error_report_err(err);
 208    }
 209}
 210
 211/*
 212 * Get last error for stream f
 213 *
 214 * Return negative error value if there has been an error on previous
 215 * operations, return 0 if no error happened.
 216 *
 217 */
 218int qemu_file_get_error(QEMUFile *f)
 219{
 220    return f->last_error;
 221}
 222
 223/*
 224 * Set the last error for stream f
 225 */
 226void qemu_file_set_error(QEMUFile *f, int ret)
 227{
 228    qemu_file_set_error_obj(f, ret, NULL);
 229}
 230
 231static bool qemu_file_is_writable(QEMUFile *f)
 232{
 233    return f->is_writable;
 234}
 235
 236static void qemu_iovec_release_ram(QEMUFile *f)
 237{
 238    struct iovec iov;
 239    unsigned long idx;
 240
 241    /* Find and release all the contiguous memory ranges marked as may_free. */
 242    idx = find_next_bit(f->may_free, f->iovcnt, 0);
 243    if (idx >= f->iovcnt) {
 244        return;
 245    }
 246    iov = f->iov[idx];
 247
 248    /* The madvise() in the loop is called for iov within a continuous range and
 249     * then reinitialize the iov. And in the end, madvise() is called for the
 250     * last iov.
 251     */
 252    while ((idx = find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->iovcnt) {
 253        /* check for adjacent buffer and coalesce them */
 254        if (iov.iov_base + iov.iov_len == f->iov[idx].iov_base) {
 255            iov.iov_len += f->iov[idx].iov_len;
 256            continue;
 257        }
 258        if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
 259            error_report("migrate: madvise DONTNEED failed %p %zd: %s",
 260                         iov.iov_base, iov.iov_len, strerror(errno));
 261        }
 262        iov = f->iov[idx];
 263    }
 264    if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) {
 265            error_report("migrate: madvise DONTNEED failed %p %zd: %s",
 266                         iov.iov_base, iov.iov_len, strerror(errno));
 267    }
 268    memset(f->may_free, 0, sizeof(f->may_free));
 269}
 270
 271bool qemu_file_is_seekable(QEMUFile *f)
 272{
 273    return qio_channel_has_feature(f->ioc, QIO_CHANNEL_FEATURE_SEEKABLE);
 274}
 275
 276/**
 277 * Flushes QEMUFile buffer
 278 *
 279 * This will flush all pending data. If data was only partially flushed, it
 280 * will set an error state.
 281 */
 282int qemu_fflush(QEMUFile *f)
 283{
 284    if (!qemu_file_is_writable(f)) {
 285        return f->last_error;
 286    }
 287
 288    if (f->last_error) {
 289        return f->last_error;
 290    }
 291    if (f->iovcnt > 0) {
 292        Error *local_error = NULL;
 293        if (qio_channel_writev_all(f->ioc,
 294                                   f->iov, f->iovcnt,
 295                                   &local_error) < 0) {
 296            qemu_file_set_error_obj(f, -EIO, local_error);
 297        } else {
 298            uint64_t size = iov_size(f->iov, f->iovcnt);
 299            stat64_add(&mig_stats.qemu_file_transferred, size);
 300        }
 301
 302        qemu_iovec_release_ram(f);
 303    }
 304
 305    f->buf_index = 0;
 306    f->iovcnt = 0;
 307    return f->last_error;
 308}
 309
 310/*
 311 * Attempt to fill the buffer from the underlying file
 312 * Returns the number of bytes read, or negative value for an error.
 313 *
 314 * Note that it can return a partially full buffer even in a not error/not EOF
 315 * case if the underlying file descriptor gives a short read, and that can
 316 * happen even on a blocking fd.
 317 */
 318static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
 319{
 320    int len;
 321    int pending;
 322    Error *local_error = NULL;
 323    g_autofree int *fds = NULL;
 324    size_t nfd = 0;
 325    int **pfds = f->can_pass_fd ? &fds : NULL;
 326    size_t *pnfd = f->can_pass_fd ? &nfd : NULL;
 327
 328    assert(!qemu_file_is_writable(f));
 329
 330    pending = f->buf_size - f->buf_index;
 331    if (pending > 0) {
 332        memmove(f->buf, f->buf + f->buf_index, pending);
 333    }
 334    f->buf_index = 0;
 335    f->buf_size = pending;
 336
 337    if (qemu_file_get_error(f)) {
 338        return 0;
 339    }
 340
 341    do {
 342        struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
 343        len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
 344                                     &local_error);
 345        if (len == QIO_CHANNEL_ERR_BLOCK) {
 346            if (qemu_in_coroutine()) {
 347                qio_channel_yield(f->ioc, G_IO_IN);
 348            } else {
 349                qio_channel_wait(f->ioc, G_IO_IN);
 350            }
 351        } else if (len < 0) {
 352            len = -EIO;
 353        }
 354    } while (len == QIO_CHANNEL_ERR_BLOCK);
 355
 356    if (len > 0) {
 357        f->buf_size += len;
 358    } else if (len == 0) {
 359        qemu_file_set_error_obj(f, -EIO, local_error);
 360    } else {
 361        qemu_file_set_error_obj(f, len, local_error);
 362    }
 363
 364    for (int i = 0; i < nfd; i++) {
 365        FdEntry *fde = g_new0(FdEntry, 1);
 366        fde->fd = fds[i];
 367        QTAILQ_INSERT_TAIL(&f->fds, fde, entry);
 368    }
 369
 370    return len;
 371}
 372
 373int qemu_file_put_fd(QEMUFile *f, int fd)
 374{
 375    int ret = 0;
 376    QIOChannel *ioc = qemu_file_get_ioc(f);
 377    Error *err = NULL;
 378    struct iovec iov = { (void *)" ", 1 };
 379
 380    /*
 381     * Send a dummy byte so qemu_fill_buffer on the receiving side does not
 382     * fail with a len=0 error.  Flush first to maintain ordering wrt other
 383     * data.
 384     */
 385
 386    qemu_fflush(f);
 387    if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) {
 388        error_report_err(error_copy(err));
 389        qemu_file_set_error_obj(f, -EIO, err);
 390        ret = -1;
 391    }
 392    trace_qemu_file_put_fd(f->ioc->name, fd, ret);
 393    return ret;
 394}
 395
 396int qemu_file_get_fd(QEMUFile *f)
 397{
 398    int fd = -1;
 399    FdEntry *fde;
 400
 401    if (!f->can_pass_fd) {
 402        Error *err = NULL;
 403        error_setg(&err, "%s does not support fd passing", f->ioc->name);
 404        error_report_err(error_copy(err));
 405        qemu_file_set_error_obj(f, -EIO, err);
 406        goto out;
 407    }
 408
 409    /* Force the dummy byte and its fd passenger to appear. */
 410    qemu_peek_byte(f, 0);
 411
 412    fde = QTAILQ_FIRST(&f->fds);
 413    if (fde) {
 414        qemu_get_byte(f);       /* Drop the dummy byte */
 415        fd = fde->fd;
 416        QTAILQ_REMOVE(&f->fds, fde, entry);
 417        g_free(fde);
 418    }
 419out:
 420    trace_qemu_file_get_fd(f->ioc->name, fd);
 421    return fd;
 422}
 423
 424/** Closes the file
 425 *
 426 * Returns negative error value if any error happened on previous operations or
 427 * while closing the file. Returns 0 or positive number on success.
 428 *
 429 * The meaning of return value on success depends on the specific backend
 430 * being used.
 431 */
 432int qemu_fclose(QEMUFile *f)
 433{
 434    FdEntry *fde, *next;
 435    int ret = qemu_fflush(f);
 436    int ret2 = qio_channel_close(f->ioc, NULL);
 437    if (ret >= 0) {
 438        ret = ret2;
 439    }
 440    QTAILQ_FOREACH_SAFE(fde, &f->fds, entry, next) {
 441        warn_report("qemu_fclose: received fd %d was never claimed", fde->fd);
 442        close(fde->fd);
 443        g_free(fde);
 444    }
 445    g_clear_pointer(&f->ioc, object_unref);
 446    error_free(f->last_error_obj);
 447    g_free(f);
 448    trace_qemu_file_fclose();
 449    return ret;
 450}
 451
 452/*
 453 * Add buf to iovec. Do flush if iovec is full.
 454 *
 455 * Return values:
 456 * 1 iovec is full and flushed
 457 * 0 iovec is not flushed
 458 *
 459 */
 460static int add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size,
 461                        bool may_free)
 462{
 463    /* check for adjacent buffer and coalesce them */
 464    if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base +
 465        f->iov[f->iovcnt - 1].iov_len &&
 466        may_free == test_bit(f->iovcnt - 1, f->may_free))
 467    {
 468        f->iov[f->iovcnt - 1].iov_len += size;
 469    } else {
 470        if (f->iovcnt >= MAX_IOV_SIZE) {
 471            /* Should only happen if a previous fflush failed */
 472            assert(qemu_file_get_error(f) || !qemu_file_is_writable(f));
 473            return 1;
 474        }
 475        if (may_free) {
 476            set_bit(f->iovcnt, f->may_free);
 477        }
 478        f->iov[f->iovcnt].iov_base = (uint8_t *)buf;
 479        f->iov[f->iovcnt++].iov_len = size;
 480    }
 481
 482    if (f->iovcnt >= MAX_IOV_SIZE) {
 483        qemu_fflush(f);
 484        return 1;
 485    }
 486
 487    return 0;
 488}
 489
 490static void add_buf_to_iovec(QEMUFile *f, size_t len)
 491{
 492    if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) {
 493        f->buf_index += len;
 494        if (f->buf_index == IO_BUF_SIZE) {
 495            qemu_fflush(f);
 496        }
 497    }
 498}
 499
 500void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size,
 501                           bool may_free)
 502{
 503    if (f->last_error) {
 504        return;
 505    }
 506
 507    add_to_iovec(f, buf, size, may_free);
 508}
 509
 510void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
 511{
 512    size_t l;
 513
 514    if (f->last_error) {
 515        return;
 516    }
 517
 518    while (size > 0) {
 519        l = IO_BUF_SIZE - f->buf_index;
 520        if (l > size) {
 521            l = size;
 522        }
 523        memcpy(f->buf + f->buf_index, buf, l);
 524        add_buf_to_iovec(f, l);
 525        if (qemu_file_get_error(f)) {
 526            break;
 527        }
 528        buf += l;
 529        size -= l;
 530    }
 531}
 532
 533void qemu_put_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
 534                        off_t pos)
 535{
 536    Error *err = NULL;
 537    size_t ret;
 538
 539    if (f->last_error) {
 540        return;
 541    }
 542
 543    qemu_fflush(f);
 544    ret = qio_channel_pwrite(f->ioc, (char *)buf, buflen, pos, &err);
 545
 546    if (err) {
 547        qemu_file_set_error_obj(f, -EIO, err);
 548        return;
 549    }
 550
 551    if ((ssize_t)ret == QIO_CHANNEL_ERR_BLOCK) {
 552        qemu_file_set_error_obj(f, -EAGAIN, NULL);
 553        return;
 554    }
 555
 556    if (ret != buflen) {
 557        error_setg(&err, "Partial write of size %zu, expected %zu", ret,
 558                   buflen);
 559        qemu_file_set_error_obj(f, -EIO, err);
 560        return;
 561    }
 562
 563    stat64_add(&mig_stats.qemu_file_transferred, buflen);
 564}
 565
 566
 567size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
 568                          off_t pos)
 569{
 570    Error *err = NULL;
 571    size_t ret;
 572
 573    if (f->last_error) {
 574        return 0;
 575    }
 576
 577    ret = qio_channel_pread(f->ioc, (char *)buf, buflen, pos, &err);
 578
 579    if ((ssize_t)ret == -1 || err) {
 580        qemu_file_set_error_obj(f, -EIO, err);
 581        return 0;
 582    }
 583
 584    if ((ssize_t)ret == QIO_CHANNEL_ERR_BLOCK) {
 585        qemu_file_set_error_obj(f, -EAGAIN, NULL);
 586        return 0;
 587    }
 588
 589    if (ret != buflen) {
 590        error_setg(&err, "Partial read of size %zu, expected %zu", ret, buflen);
 591        qemu_file_set_error_obj(f, -EIO, err);
 592        return 0;
 593    }
 594
 595    return ret;
 596}
 597
 598void qemu_set_offset(QEMUFile *f, off_t off, int whence)
 599{
 600    Error *err = NULL;
 601    off_t ret;
 602
 603    if (qemu_file_is_writable(f)) {
 604        qemu_fflush(f);
 605    } else {
 606        /* Drop all cached buffers if existed; will trigger a re-fill later */
 607        f->buf_index = 0;
 608        f->buf_size = 0;
 609    }
 610
 611    ret = qio_channel_io_seek(f->ioc, off, whence, &err);
 612    if (ret == (off_t)-1) {
 613        qemu_file_set_error_obj(f, -EIO, err);
 614    }
 615}
 616
 617off_t qemu_get_offset(QEMUFile *f)
 618{
 619    Error *err = NULL;
 620    off_t ret;
 621
 622    qemu_fflush(f);
 623
 624    ret = qio_channel_io_seek(f->ioc, 0, SEEK_CUR, &err);
 625    if (ret == (off_t)-1) {
 626        qemu_file_set_error_obj(f, -EIO, err);
 627    }
 628    return ret;
 629}
 630
 631
 632void qemu_put_byte(QEMUFile *f, int v)
 633{
 634    if (f->last_error) {
 635        return;
 636    }
 637
 638    f->buf[f->buf_index] = v;
 639    add_buf_to_iovec(f, 1);
 640}
 641
 642void qemu_file_skip(QEMUFile *f, int size)
 643{
 644    if (f->buf_index + size <= f->buf_size) {
 645        f->buf_index += size;
 646    }
 647}
 648
 649/*
 650 * Read 'size' bytes from file (at 'offset') without moving the
 651 * pointer and set 'buf' to point to that data.
 652 *
 653 * It will return size bytes unless there was an error, in which case it will
 654 * return as many as it managed to read (assuming blocking fd's which
 655 * all current QEMUFile are)
 656 */
 657size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset)
 658{
 659    ssize_t pending;
 660    size_t index;
 661
 662    assert(!qemu_file_is_writable(f));
 663    assert(offset < IO_BUF_SIZE);
 664    assert(size <= IO_BUF_SIZE - offset);
 665
 666    /* The 1st byte to read from */
 667    index = f->buf_index + offset;
 668    /* The number of available bytes starting at index */
 669    pending = f->buf_size - index;
 670
 671    /*
 672     * qemu_fill_buffer might return just a few bytes, even when there isn't
 673     * an error, so loop collecting them until we get enough.
 674     */
 675    while (pending < size) {
 676        int received = qemu_fill_buffer(f);
 677
 678        if (received <= 0) {
 679            break;
 680        }
 681
 682        index = f->buf_index + offset;
 683        pending = f->buf_size - index;
 684    }
 685
 686    if (pending <= 0) {
 687        return 0;
 688    }
 689    if (size > pending) {
 690        size = pending;
 691    }
 692
 693    *buf = f->buf + index;
 694    return size;
 695}
 696
 697/*
 698 * Read 'size' bytes of data from the file into buf.
 699 * 'size' can be larger than the internal buffer.
 700 *
 701 * It will return size bytes unless there was an error, in which case it will
 702 * return as many as it managed to read (assuming blocking fd's which
 703 * all current QEMUFile are)
 704 */
 705size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size)
 706{
 707    size_t pending = size;
 708    size_t done = 0;
 709
 710    while (pending > 0) {
 711        size_t res;
 712        uint8_t *src;
 713
 714        res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0);
 715        if (res == 0) {
 716            return done;
 717        }
 718        memcpy(buf, src, res);
 719        qemu_file_skip(f, res);
 720        buf += res;
 721        pending -= res;
 722        done += res;
 723    }
 724    return done;
 725}
 726
 727/*
 728 * Read 'size' bytes of data from the file.
 729 * 'size' can be larger than the internal buffer.
 730 *
 731 * The data:
 732 *   may be held on an internal buffer (in which case *buf is updated
 733 *     to point to it) that is valid until the next qemu_file operation.
 734 * OR
 735 *   will be copied to the *buf that was passed in.
 736 *
 737 * The code tries to avoid the copy if possible.
 738 *
 739 * It will return size bytes unless there was an error, in which case it will
 740 * return as many as it managed to read (assuming blocking fd's which
 741 * all current QEMUFile are)
 742 *
 743 * Note: Since **buf may get changed, the caller should take care to
 744 *       keep a pointer to the original buffer if it needs to deallocate it.
 745 */
 746size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size)
 747{
 748    if (size < IO_BUF_SIZE) {
 749        size_t res;
 750        uint8_t *src = NULL;
 751
 752        res = qemu_peek_buffer(f, &src, size, 0);
 753
 754        if (res == size) {
 755            qemu_file_skip(f, res);
 756            *buf = src;
 757            return res;
 758        }
 759    }
 760
 761    return qemu_get_buffer(f, *buf, size);
 762}
 763
 764/*
 765 * Peeks a single byte from the buffer; this isn't guaranteed to work if
 766 * offset leaves a gap after the previous read/peeked data.
 767 */
 768int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset)
 769{
 770    int index = f->buf_index + offset;
 771
 772    assert(!qemu_file_is_writable(f));
 773    assert(offset < IO_BUF_SIZE);
 774
 775    if (index >= f->buf_size) {
 776        qemu_fill_buffer(f);
 777        index = f->buf_index + offset;
 778        if (index >= f->buf_size) {
 779            return 0;
 780        }
 781    }
 782    return f->buf[index];
 783}
 784
 785int coroutine_mixed_fn qemu_get_byte(QEMUFile *f)
 786{
 787    int result;
 788
 789    result = qemu_peek_byte(f, 0);
 790    qemu_file_skip(f, 1);
 791    return result;
 792}
 793
 794uint64_t qemu_file_transferred(QEMUFile *f)
 795{
 796    uint64_t ret = stat64_get(&mig_stats.qemu_file_transferred);
 797    int i;
 798
 799    g_assert(qemu_file_is_writable(f));
 800
 801    for (i = 0; i < f->iovcnt; i++) {
 802        ret += f->iov[i].iov_len;
 803    }
 804
 805    return ret;
 806}
 807
 808void qemu_put_be16(QEMUFile *f, unsigned int v)
 809{
 810    qemu_put_byte(f, v >> 8);
 811    qemu_put_byte(f, v);
 812}
 813
 814void qemu_put_be32(QEMUFile *f, unsigned int v)
 815{
 816    qemu_put_byte(f, v >> 24);
 817    qemu_put_byte(f, v >> 16);
 818    qemu_put_byte(f, v >> 8);
 819    qemu_put_byte(f, v);
 820}
 821
 822void qemu_put_be64(QEMUFile *f, uint64_t v)
 823{
 824    qemu_put_be32(f, v >> 32);
 825    qemu_put_be32(f, v);
 826}
 827
 828unsigned int qemu_get_be16(QEMUFile *f)
 829{
 830    unsigned int v;
 831    v = qemu_get_byte(f) << 8;
 832    v |= qemu_get_byte(f);
 833    return v;
 834}
 835
 836unsigned int qemu_get_be32(QEMUFile *f)
 837{
 838    unsigned int v;
 839    v = (unsigned int)qemu_get_byte(f) << 24;
 840    v |= qemu_get_byte(f) << 16;
 841    v |= qemu_get_byte(f) << 8;
 842    v |= qemu_get_byte(f);
 843    return v;
 844}
 845
 846uint64_t qemu_get_be64(QEMUFile *f)
 847{
 848    uint64_t v;
 849    v = (uint64_t)qemu_get_be32(f) << 32;
 850    v |= qemu_get_be32(f);
 851    return v;
 852}
 853
 854/*
 855 * Get a string whose length is determined by a single preceding byte
 856 * A preallocated 256 byte buffer must be passed in.
 857 * Returns: len on success and a 0 terminated string in the buffer
 858 *          else 0
 859 *          (Note a 0 length string will return 0 either way)
 860 */
 861size_t coroutine_fn qemu_get_counted_string(QEMUFile *f, char buf[256])
 862{
 863    size_t len = qemu_get_byte(f);
 864    size_t res = qemu_get_buffer(f, (uint8_t *)buf, len);
 865
 866    buf[res] = 0;
 867
 868    return res == len ? res : 0;
 869}
 870
 871/*
 872 * Put a string with one preceding byte containing its length. The length of
 873 * the string should be less than 256.
 874 */
 875void qemu_put_counted_string(QEMUFile *f, const char *str)
 876{
 877    size_t len = strlen(str);
 878
 879    assert(len < 256);
 880    qemu_put_byte(f, len);
 881    qemu_put_buffer(f, (const uint8_t *)str, len);
 882}
 883
 884/*
 885 * Set the blocking state of the QEMUFile.
 886 * Note: On some transports the OS only keeps a single blocking state for
 887 *       both directions, and thus changing the blocking on the main
 888 *       QEMUFile can also affect the return path.
 889 */
 890void qemu_file_set_blocking(QEMUFile *f, bool block)
 891{
 892    qio_channel_set_blocking(f->ioc, block, NULL);
 893}
 894
 895/*
 896 * qemu_file_get_ioc:
 897 *
 898 * Get the ioc object for the file, without incrementing
 899 * the reference count.
 900 *
 901 * Returns: the ioc object
 902 */
 903QIOChannel *qemu_file_get_ioc(QEMUFile *file)
 904{
 905    return file->ioc;
 906}
 907
 908/*
 909 * Read size bytes from QEMUFile f and write them to fd.
 910 */
 911int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size)
 912{
 913    while (size) {
 914        size_t pending = f->buf_size - f->buf_index;
 915        ssize_t rc;
 916
 917        if (!pending) {
 918            rc = qemu_fill_buffer(f);
 919            if (rc < 0) {
 920                return rc;
 921            }
 922            if (rc == 0) {
 923                return -EIO;
 924            }
 925            continue;
 926        }
 927
 928        rc = write(fd, f->buf + f->buf_index, MIN(pending, size));
 929        if (rc < 0) {
 930            return -errno;
 931        }
 932        if (rc == 0) {
 933            return -EIO;
 934        }
 935        f->buf_index += rc;
 936        size -= rc;
 937    }
 938
 939    return 0;
 940}
 941