qemu/hw/virtio/vhost-user.c
<<
>>
Prefs
   1/*
   2 * vhost-user
   3 *
   4 * Copyright (c) 2013 Virtual Open Systems Sarl.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 *
   9 */
  10
  11#include "qemu/osdep.h"
  12#include "qapi/error.h"
  13#include "hw/virtio/vhost.h"
  14#include "hw/virtio/vhost-user.h"
  15#include "hw/virtio/vhost-backend.h"
  16#include "hw/virtio/virtio.h"
  17#include "hw/virtio/virtio-net.h"
  18#include "chardev/char-fe.h"
  19#include "sysemu/kvm.h"
  20#include "qemu/error-report.h"
  21#include "qemu/main-loop.h"
  22#include "qemu/sockets.h"
  23#include "sysemu/cryptodev.h"
  24#include "migration/migration.h"
  25#include "migration/postcopy-ram.h"
  26#include "trace.h"
  27
  28#include <sys/ioctl.h>
  29#include <sys/socket.h>
  30#include <sys/un.h>
  31
  32#include "standard-headers/linux/vhost_types.h"
  33
  34#ifdef CONFIG_LINUX
  35#include <linux/userfaultfd.h>
  36#endif
  37
  38#define VHOST_MEMORY_BASELINE_NREGIONS    8
  39#define VHOST_USER_F_PROTOCOL_FEATURES 30
  40#define VHOST_USER_SLAVE_MAX_FDS     8
  41
  42/*
  43 * Set maximum number of RAM slots supported to
  44 * the maximum number supported by the target
  45 * hardware plaform.
  46 */
  47#if defined(TARGET_X86) || defined(TARGET_X86_64) || \
  48    defined(TARGET_ARM) || defined(TARGET_ARM_64)
  49#include "hw/acpi/acpi.h"
  50#define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS
  51
  52#elif defined(TARGET_PPC) || defined(TARGET_PPC_64)
  53#include "hw/ppc/spapr.h"
  54#define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
  55
  56#else
  57#define VHOST_USER_MAX_RAM_SLOTS 512
  58#endif
  59
  60/*
  61 * Maximum size of virtio device config space
  62 */
  63#define VHOST_USER_MAX_CONFIG_SIZE 256
  64
  65enum VhostUserProtocolFeature {
  66    VHOST_USER_PROTOCOL_F_MQ = 0,
  67    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
  68    VHOST_USER_PROTOCOL_F_RARP = 2,
  69    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
  70    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
  71    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
  72    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
  73    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
  74    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
  75    VHOST_USER_PROTOCOL_F_CONFIG = 9,
  76    VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
  77    VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
  78    VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
  79    VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
  80    /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
  81    VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
  82    VHOST_USER_PROTOCOL_F_MAX
  83};
  84
  85#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
  86
  87typedef enum VhostUserRequest {
  88    VHOST_USER_NONE = 0,
  89    VHOST_USER_GET_FEATURES = 1,
  90    VHOST_USER_SET_FEATURES = 2,
  91    VHOST_USER_SET_OWNER = 3,
  92    VHOST_USER_RESET_OWNER = 4,
  93    VHOST_USER_SET_MEM_TABLE = 5,
  94    VHOST_USER_SET_LOG_BASE = 6,
  95    VHOST_USER_SET_LOG_FD = 7,
  96    VHOST_USER_SET_VRING_NUM = 8,
  97    VHOST_USER_SET_VRING_ADDR = 9,
  98    VHOST_USER_SET_VRING_BASE = 10,
  99    VHOST_USER_GET_VRING_BASE = 11,
 100    VHOST_USER_SET_VRING_KICK = 12,
 101    VHOST_USER_SET_VRING_CALL = 13,
 102    VHOST_USER_SET_VRING_ERR = 14,
 103    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
 104    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
 105    VHOST_USER_GET_QUEUE_NUM = 17,
 106    VHOST_USER_SET_VRING_ENABLE = 18,
 107    VHOST_USER_SEND_RARP = 19,
 108    VHOST_USER_NET_SET_MTU = 20,
 109    VHOST_USER_SET_SLAVE_REQ_FD = 21,
 110    VHOST_USER_IOTLB_MSG = 22,
 111    VHOST_USER_SET_VRING_ENDIAN = 23,
 112    VHOST_USER_GET_CONFIG = 24,
 113    VHOST_USER_SET_CONFIG = 25,
 114    VHOST_USER_CREATE_CRYPTO_SESSION = 26,
 115    VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
 116    VHOST_USER_POSTCOPY_ADVISE  = 28,
 117    VHOST_USER_POSTCOPY_LISTEN  = 29,
 118    VHOST_USER_POSTCOPY_END     = 30,
 119    VHOST_USER_GET_INFLIGHT_FD = 31,
 120    VHOST_USER_SET_INFLIGHT_FD = 32,
 121    VHOST_USER_GPU_SET_SOCKET = 33,
 122    VHOST_USER_RESET_DEVICE = 34,
 123    /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
 124    VHOST_USER_GET_MAX_MEM_SLOTS = 36,
 125    VHOST_USER_ADD_MEM_REG = 37,
 126    VHOST_USER_REM_MEM_REG = 38,
 127    VHOST_USER_MAX
 128} VhostUserRequest;
 129
 130typedef enum VhostUserSlaveRequest {
 131    VHOST_USER_SLAVE_NONE = 0,
 132    VHOST_USER_SLAVE_IOTLB_MSG = 1,
 133    VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
 134    VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
 135    VHOST_USER_SLAVE_MAX
 136}  VhostUserSlaveRequest;
 137
 138typedef struct VhostUserMemoryRegion {
 139    uint64_t guest_phys_addr;
 140    uint64_t memory_size;
 141    uint64_t userspace_addr;
 142    uint64_t mmap_offset;
 143} VhostUserMemoryRegion;
 144
 145typedef struct VhostUserMemory {
 146    uint32_t nregions;
 147    uint32_t padding;
 148    VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
 149} VhostUserMemory;
 150
 151typedef struct VhostUserMemRegMsg {
 152    uint64_t padding;
 153    VhostUserMemoryRegion region;
 154} VhostUserMemRegMsg;
 155
 156typedef struct VhostUserLog {
 157    uint64_t mmap_size;
 158    uint64_t mmap_offset;
 159} VhostUserLog;
 160
 161typedef struct VhostUserConfig {
 162    uint32_t offset;
 163    uint32_t size;
 164    uint32_t flags;
 165    uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
 166} VhostUserConfig;
 167
 168#define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
 169#define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
 170
 171typedef struct VhostUserCryptoSession {
 172    /* session id for success, -1 on errors */
 173    int64_t session_id;
 174    CryptoDevBackendSymSessionInfo session_setup_data;
 175    uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
 176    uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
 177} VhostUserCryptoSession;
 178
 179static VhostUserConfig c __attribute__ ((unused));
 180#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
 181                                   + sizeof(c.size) \
 182                                   + sizeof(c.flags))
 183
 184typedef struct VhostUserVringArea {
 185    uint64_t u64;
 186    uint64_t size;
 187    uint64_t offset;
 188} VhostUserVringArea;
 189
 190typedef struct VhostUserInflight {
 191    uint64_t mmap_size;
 192    uint64_t mmap_offset;
 193    uint16_t num_queues;
 194    uint16_t queue_size;
 195} VhostUserInflight;
 196
 197typedef struct {
 198    VhostUserRequest request;
 199
 200#define VHOST_USER_VERSION_MASK     (0x3)
 201#define VHOST_USER_REPLY_MASK       (0x1<<2)
 202#define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
 203    uint32_t flags;
 204    uint32_t size; /* the following payload size */
 205} QEMU_PACKED VhostUserHeader;
 206
 207typedef union {
 208#define VHOST_USER_VRING_IDX_MASK   (0xff)
 209#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
 210        uint64_t u64;
 211        struct vhost_vring_state state;
 212        struct vhost_vring_addr addr;
 213        VhostUserMemory memory;
 214        VhostUserMemRegMsg mem_reg;
 215        VhostUserLog log;
 216        struct vhost_iotlb_msg iotlb;
 217        VhostUserConfig config;
 218        VhostUserCryptoSession session;
 219        VhostUserVringArea area;
 220        VhostUserInflight inflight;
 221} VhostUserPayload;
 222
 223typedef struct VhostUserMsg {
 224    VhostUserHeader hdr;
 225    VhostUserPayload payload;
 226} QEMU_PACKED VhostUserMsg;
 227
 228static VhostUserMsg m __attribute__ ((unused));
 229#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
 230
 231#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
 232
 233/* The version of the protocol we support */
 234#define VHOST_USER_VERSION    (0x1)
 235
 236struct vhost_user {
 237    struct vhost_dev *dev;
 238    /* Shared between vhost devs of the same virtio device */
 239    VhostUserState *user;
 240    int slave_fd;
 241    NotifierWithReturn postcopy_notifier;
 242    struct PostCopyFD  postcopy_fd;
 243    uint64_t           postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
 244    /* Length of the region_rb and region_rb_offset arrays */
 245    size_t             region_rb_len;
 246    /* RAMBlock associated with a given region */
 247    RAMBlock         **region_rb;
 248    /* The offset from the start of the RAMBlock to the start of the
 249     * vhost region.
 250     */
 251    ram_addr_t        *region_rb_offset;
 252
 253    /* True once we've entered postcopy_listen */
 254    bool               postcopy_listen;
 255
 256    /* Our current regions */
 257    int num_shadow_regions;
 258    struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
 259};
 260
 261struct scrub_regions {
 262    struct vhost_memory_region *region;
 263    int reg_idx;
 264    int fd_idx;
 265};
 266
 267static bool ioeventfd_enabled(void)
 268{
 269    return !kvm_enabled() || kvm_eventfds_enabled();
 270}
 271
 272static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
 273{
 274    struct vhost_user *u = dev->opaque;
 275    CharBackend *chr = u->user->chr;
 276    uint8_t *p = (uint8_t *) msg;
 277    int r, size = VHOST_USER_HDR_SIZE;
 278
 279    r = qemu_chr_fe_read_all(chr, p, size);
 280    if (r != size) {
 281        error_report("Failed to read msg header. Read %d instead of %d."
 282                     " Original request %d.", r, size, msg->hdr.request);
 283        return -1;
 284    }
 285
 286    /* validate received flags */
 287    if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
 288        error_report("Failed to read msg header."
 289                " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
 290                VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
 291        return -1;
 292    }
 293
 294    return 0;
 295}
 296
 297static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
 298{
 299    struct vhost_user *u = dev->opaque;
 300    CharBackend *chr = u->user->chr;
 301    uint8_t *p = (uint8_t *) msg;
 302    int r, size;
 303
 304    if (vhost_user_read_header(dev, msg) < 0) {
 305        return -1;
 306    }
 307
 308    /* validate message size is sane */
 309    if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
 310        error_report("Failed to read msg header."
 311                " Size %d exceeds the maximum %zu.", msg->hdr.size,
 312                VHOST_USER_PAYLOAD_SIZE);
 313        return -1;
 314    }
 315
 316    if (msg->hdr.size) {
 317        p += VHOST_USER_HDR_SIZE;
 318        size = msg->hdr.size;
 319        r = qemu_chr_fe_read_all(chr, p, size);
 320        if (r != size) {
 321            error_report("Failed to read msg payload."
 322                         " Read %d instead of %d.", r, msg->hdr.size);
 323            return -1;
 324        }
 325    }
 326
 327    return 0;
 328}
 329
 330static int process_message_reply(struct vhost_dev *dev,
 331                                 const VhostUserMsg *msg)
 332{
 333    VhostUserMsg msg_reply;
 334
 335    if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
 336        return 0;
 337    }
 338
 339    if (vhost_user_read(dev, &msg_reply) < 0) {
 340        return -1;
 341    }
 342
 343    if (msg_reply.hdr.request != msg->hdr.request) {
 344        error_report("Received unexpected msg type."
 345                     "Expected %d received %d",
 346                     msg->hdr.request, msg_reply.hdr.request);
 347        return -1;
 348    }
 349
 350    return msg_reply.payload.u64 ? -1 : 0;
 351}
 352
 353static bool vhost_user_one_time_request(VhostUserRequest request)
 354{
 355    switch (request) {
 356    case VHOST_USER_SET_OWNER:
 357    case VHOST_USER_RESET_OWNER:
 358    case VHOST_USER_SET_MEM_TABLE:
 359    case VHOST_USER_GET_QUEUE_NUM:
 360    case VHOST_USER_NET_SET_MTU:
 361        return true;
 362    default:
 363        return false;
 364    }
 365}
 366
 367/* most non-init callers ignore the error */
 368static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
 369                            int *fds, int fd_num)
 370{
 371    struct vhost_user *u = dev->opaque;
 372    CharBackend *chr = u->user->chr;
 373    int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
 374
 375    /*
 376     * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
 377     * we just need send it once in the first time. For later such
 378     * request, we just ignore it.
 379     */
 380    if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
 381        msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
 382        return 0;
 383    }
 384
 385    if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
 386        error_report("Failed to set msg fds.");
 387        return -1;
 388    }
 389
 390    ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
 391    if (ret != size) {
 392        error_report("Failed to write msg."
 393                     " Wrote %d instead of %d.", ret, size);
 394        return -1;
 395    }
 396
 397    return 0;
 398}
 399
 400int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
 401{
 402    VhostUserMsg msg = {
 403        .hdr.request = VHOST_USER_GPU_SET_SOCKET,
 404        .hdr.flags = VHOST_USER_VERSION,
 405    };
 406
 407    return vhost_user_write(dev, &msg, &fd, 1);
 408}
 409
 410static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
 411                                   struct vhost_log *log)
 412{
 413    int fds[VHOST_USER_MAX_RAM_SLOTS];
 414    size_t fd_num = 0;
 415    bool shmfd = virtio_has_feature(dev->protocol_features,
 416                                    VHOST_USER_PROTOCOL_F_LOG_SHMFD);
 417    VhostUserMsg msg = {
 418        .hdr.request = VHOST_USER_SET_LOG_BASE,
 419        .hdr.flags = VHOST_USER_VERSION,
 420        .payload.log.mmap_size = log->size * sizeof(*(log->log)),
 421        .payload.log.mmap_offset = 0,
 422        .hdr.size = sizeof(msg.payload.log),
 423    };
 424
 425    if (shmfd && log->fd != -1) {
 426        fds[fd_num++] = log->fd;
 427    }
 428
 429    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 430        return -1;
 431    }
 432
 433    if (shmfd) {
 434        msg.hdr.size = 0;
 435        if (vhost_user_read(dev, &msg) < 0) {
 436            return -1;
 437        }
 438
 439        if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
 440            error_report("Received unexpected msg type. "
 441                         "Expected %d received %d",
 442                         VHOST_USER_SET_LOG_BASE, msg.hdr.request);
 443            return -1;
 444        }
 445    }
 446
 447    return 0;
 448}
 449
 450static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
 451                                            int *fd)
 452{
 453    MemoryRegion *mr;
 454
 455    assert((uintptr_t)addr == addr);
 456    mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
 457    *fd = memory_region_get_fd(mr);
 458
 459    return mr;
 460}
 461
 462static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
 463                                       struct vhost_memory_region *src,
 464                                       uint64_t mmap_offset)
 465{
 466    assert(src != NULL && dst != NULL);
 467    dst->userspace_addr = src->userspace_addr;
 468    dst->memory_size = src->memory_size;
 469    dst->guest_phys_addr = src->guest_phys_addr;
 470    dst->mmap_offset = mmap_offset;
 471}
 472
 473static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
 474                                             struct vhost_dev *dev,
 475                                             VhostUserMsg *msg,
 476                                             int *fds, size_t *fd_num,
 477                                             bool track_ramblocks)
 478{
 479    int i, fd;
 480    ram_addr_t offset;
 481    MemoryRegion *mr;
 482    struct vhost_memory_region *reg;
 483    VhostUserMemoryRegion region_buffer;
 484
 485    msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
 486
 487    for (i = 0; i < dev->mem->nregions; ++i) {
 488        reg = dev->mem->regions + i;
 489
 490        mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 491        if (fd > 0) {
 492            if (track_ramblocks) {
 493                assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
 494                trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
 495                                                      reg->memory_size,
 496                                                      reg->guest_phys_addr,
 497                                                      reg->userspace_addr,
 498                                                      offset);
 499                u->region_rb_offset[i] = offset;
 500                u->region_rb[i] = mr->ram_block;
 501            } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
 502                error_report("Failed preparing vhost-user memory table msg");
 503                return -1;
 504            }
 505            vhost_user_fill_msg_region(&region_buffer, reg, offset);
 506            msg->payload.memory.regions[*fd_num] = region_buffer;
 507            fds[(*fd_num)++] = fd;
 508        } else if (track_ramblocks) {
 509            u->region_rb_offset[i] = 0;
 510            u->region_rb[i] = NULL;
 511        }
 512    }
 513
 514    msg->payload.memory.nregions = *fd_num;
 515
 516    if (!*fd_num) {
 517        error_report("Failed initializing vhost-user memory map, "
 518                     "consider using -object memory-backend-file share=on");
 519        return -1;
 520    }
 521
 522    msg->hdr.size = sizeof(msg->payload.memory.nregions);
 523    msg->hdr.size += sizeof(msg->payload.memory.padding);
 524    msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
 525
 526    return 1;
 527}
 528
 529static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
 530                             struct vhost_memory_region *vdev_reg)
 531{
 532    return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
 533        shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
 534        shadow_reg->memory_size == vdev_reg->memory_size;
 535}
 536
 537static void scrub_shadow_regions(struct vhost_dev *dev,
 538                                 struct scrub_regions *add_reg,
 539                                 int *nr_add_reg,
 540                                 struct scrub_regions *rem_reg,
 541                                 int *nr_rem_reg, uint64_t *shadow_pcb,
 542                                 bool track_ramblocks)
 543{
 544    struct vhost_user *u = dev->opaque;
 545    bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
 546    struct vhost_memory_region *reg, *shadow_reg;
 547    int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
 548    ram_addr_t offset;
 549    MemoryRegion *mr;
 550    bool matching;
 551
 552    /*
 553     * Find memory regions present in our shadow state which are not in
 554     * the device's current memory state.
 555     *
 556     * Mark regions in both the shadow and device state as "found".
 557     */
 558    for (i = 0; i < u->num_shadow_regions; i++) {
 559        shadow_reg = &u->shadow_regions[i];
 560        matching = false;
 561
 562        for (j = 0; j < dev->mem->nregions; j++) {
 563            reg = &dev->mem->regions[j];
 564
 565            mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 566
 567            if (reg_equal(shadow_reg, reg)) {
 568                matching = true;
 569                found[j] = true;
 570                if (track_ramblocks) {
 571                    /*
 572                     * Reset postcopy client bases, region_rb, and
 573                     * region_rb_offset in case regions are removed.
 574                     */
 575                    if (fd > 0) {
 576                        u->region_rb_offset[j] = offset;
 577                        u->region_rb[j] = mr->ram_block;
 578                        shadow_pcb[j] = u->postcopy_client_bases[i];
 579                    } else {
 580                        u->region_rb_offset[j] = 0;
 581                        u->region_rb[j] = NULL;
 582                    }
 583                }
 584                break;
 585            }
 586        }
 587
 588        /*
 589         * If the region was not found in the current device memory state
 590         * create an entry for it in the removed list.
 591         */
 592        if (!matching) {
 593            rem_reg[rm_idx].region = shadow_reg;
 594            rem_reg[rm_idx++].reg_idx = i;
 595        }
 596    }
 597
 598    /*
 599     * For regions not marked "found", create entries in the added list.
 600     *
 601     * Note their indexes in the device memory state and the indexes of their
 602     * file descriptors.
 603     */
 604    for (i = 0; i < dev->mem->nregions; i++) {
 605        reg = &dev->mem->regions[i];
 606        vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 607        if (fd > 0) {
 608            ++fd_num;
 609        }
 610
 611        /*
 612         * If the region was in both the shadow and device state we don't
 613         * need to send a VHOST_USER_ADD_MEM_REG message for it.
 614         */
 615        if (found[i]) {
 616            continue;
 617        }
 618
 619        add_reg[add_idx].region = reg;
 620        add_reg[add_idx].reg_idx = i;
 621        add_reg[add_idx++].fd_idx = fd_num;
 622    }
 623    *nr_rem_reg = rm_idx;
 624    *nr_add_reg = add_idx;
 625
 626    return;
 627}
 628
 629static int send_remove_regions(struct vhost_dev *dev,
 630                               struct scrub_regions *remove_reg,
 631                               int nr_rem_reg, VhostUserMsg *msg,
 632                               bool reply_supported)
 633{
 634    struct vhost_user *u = dev->opaque;
 635    struct vhost_memory_region *shadow_reg;
 636    int i, fd, shadow_reg_idx, ret;
 637    ram_addr_t offset;
 638    VhostUserMemoryRegion region_buffer;
 639
 640    /*
 641     * The regions in remove_reg appear in the same order they do in the
 642     * shadow table. Therefore we can minimize memory copies by iterating
 643     * through remove_reg backwards.
 644     */
 645    for (i = nr_rem_reg - 1; i >= 0; i--) {
 646        shadow_reg = remove_reg[i].region;
 647        shadow_reg_idx = remove_reg[i].reg_idx;
 648
 649        vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
 650
 651        if (fd > 0) {
 652            msg->hdr.request = VHOST_USER_REM_MEM_REG;
 653            vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
 654            msg->payload.mem_reg.region = region_buffer;
 655
 656            if (vhost_user_write(dev, msg, &fd, 1) < 0) {
 657                return -1;
 658            }
 659
 660            if (reply_supported) {
 661                ret = process_message_reply(dev, msg);
 662                if (ret) {
 663                    return ret;
 664                }
 665            }
 666        }
 667
 668        /*
 669         * At this point we know the backend has unmapped the region. It is now
 670         * safe to remove it from the shadow table.
 671         */
 672        memmove(&u->shadow_regions[shadow_reg_idx],
 673                &u->shadow_regions[shadow_reg_idx + 1],
 674                sizeof(struct vhost_memory_region) *
 675                (u->num_shadow_regions - shadow_reg_idx - 1));
 676        u->num_shadow_regions--;
 677    }
 678
 679    return 0;
 680}
 681
 682static int send_add_regions(struct vhost_dev *dev,
 683                            struct scrub_regions *add_reg, int nr_add_reg,
 684                            VhostUserMsg *msg, uint64_t *shadow_pcb,
 685                            bool reply_supported, bool track_ramblocks)
 686{
 687    struct vhost_user *u = dev->opaque;
 688    int i, fd, ret, reg_idx, reg_fd_idx;
 689    struct vhost_memory_region *reg;
 690    MemoryRegion *mr;
 691    ram_addr_t offset;
 692    VhostUserMsg msg_reply;
 693    VhostUserMemoryRegion region_buffer;
 694
 695    for (i = 0; i < nr_add_reg; i++) {
 696        reg = add_reg[i].region;
 697        reg_idx = add_reg[i].reg_idx;
 698        reg_fd_idx = add_reg[i].fd_idx;
 699
 700        mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 701
 702        if (fd > 0) {
 703            if (track_ramblocks) {
 704                trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
 705                                                      reg->memory_size,
 706                                                      reg->guest_phys_addr,
 707                                                      reg->userspace_addr,
 708                                                      offset);
 709                u->region_rb_offset[reg_idx] = offset;
 710                u->region_rb[reg_idx] = mr->ram_block;
 711            }
 712            msg->hdr.request = VHOST_USER_ADD_MEM_REG;
 713            vhost_user_fill_msg_region(&region_buffer, reg, offset);
 714            msg->payload.mem_reg.region = region_buffer;
 715
 716            if (vhost_user_write(dev, msg, &fd, 1) < 0) {
 717                return -1;
 718            }
 719
 720            if (track_ramblocks) {
 721                uint64_t reply_gpa;
 722
 723                if (vhost_user_read(dev, &msg_reply) < 0) {
 724                    return -1;
 725                }
 726
 727                reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
 728
 729                if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
 730                    error_report("%s: Received unexpected msg type."
 731                                 "Expected %d received %d", __func__,
 732                                 VHOST_USER_ADD_MEM_REG,
 733                                 msg_reply.hdr.request);
 734                    return -1;
 735                }
 736
 737                /*
 738                 * We're using the same structure, just reusing one of the
 739                 * fields, so it should be the same size.
 740                 */
 741                if (msg_reply.hdr.size != msg->hdr.size) {
 742                    error_report("%s: Unexpected size for postcopy reply "
 743                                 "%d vs %d", __func__, msg_reply.hdr.size,
 744                                 msg->hdr.size);
 745                    return -1;
 746                }
 747
 748                /* Get the postcopy client base from the backend's reply. */
 749                if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
 750                    shadow_pcb[reg_idx] =
 751                        msg_reply.payload.mem_reg.region.userspace_addr;
 752                    trace_vhost_user_set_mem_table_postcopy(
 753                        msg_reply.payload.mem_reg.region.userspace_addr,
 754                        msg->payload.mem_reg.region.userspace_addr,
 755                        reg_fd_idx, reg_idx);
 756                } else {
 757                    error_report("%s: invalid postcopy reply for region. "
 758                                 "Got guest physical address %" PRIX64 ", expected "
 759                                 "%" PRIX64, __func__, reply_gpa,
 760                                 dev->mem->regions[reg_idx].guest_phys_addr);
 761                    return -1;
 762                }
 763            } else if (reply_supported) {
 764                ret = process_message_reply(dev, msg);
 765                if (ret) {
 766                    return ret;
 767                }
 768            }
 769        } else if (track_ramblocks) {
 770            u->region_rb_offset[reg_idx] = 0;
 771            u->region_rb[reg_idx] = NULL;
 772        }
 773
 774        /*
 775         * At this point, we know the backend has mapped in the new
 776         * region, if the region has a valid file descriptor.
 777         *
 778         * The region should now be added to the shadow table.
 779         */
 780        u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
 781            reg->guest_phys_addr;
 782        u->shadow_regions[u->num_shadow_regions].userspace_addr =
 783            reg->userspace_addr;
 784        u->shadow_regions[u->num_shadow_regions].memory_size =
 785            reg->memory_size;
 786        u->num_shadow_regions++;
 787    }
 788
 789    return 0;
 790}
 791
 792static int vhost_user_add_remove_regions(struct vhost_dev *dev,
 793                                         VhostUserMsg *msg,
 794                                         bool reply_supported,
 795                                         bool track_ramblocks)
 796{
 797    struct vhost_user *u = dev->opaque;
 798    struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
 799    struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
 800    uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
 801    int nr_add_reg, nr_rem_reg;
 802
 803    msg->hdr.size = sizeof(msg->payload.mem_reg);
 804
 805    /* Find the regions which need to be removed or added. */
 806    scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
 807                         shadow_pcb, track_ramblocks);
 808
 809    if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
 810                reply_supported) < 0)
 811    {
 812        goto err;
 813    }
 814
 815    if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg,
 816                shadow_pcb, reply_supported, track_ramblocks) < 0)
 817    {
 818        goto err;
 819    }
 820
 821    if (track_ramblocks) {
 822        memcpy(u->postcopy_client_bases, shadow_pcb,
 823               sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
 824        /*
 825         * Now we've registered this with the postcopy code, we ack to the
 826         * client, because now we're in the position to be able to deal with
 827         * any faults it generates.
 828         */
 829        /* TODO: Use this for failure cases as well with a bad value. */
 830        msg->hdr.size = sizeof(msg->payload.u64);
 831        msg->payload.u64 = 0; /* OK */
 832
 833        if (vhost_user_write(dev, msg, NULL, 0) < 0) {
 834            return -1;
 835        }
 836    }
 837
 838    return 0;
 839
 840err:
 841    if (track_ramblocks) {
 842        memcpy(u->postcopy_client_bases, shadow_pcb,
 843               sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
 844    }
 845
 846    return -1;
 847}
 848
 849static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
 850                                             struct vhost_memory *mem,
 851                                             bool reply_supported,
 852                                             bool config_mem_slots)
 853{
 854    struct vhost_user *u = dev->opaque;
 855    int fds[VHOST_MEMORY_BASELINE_NREGIONS];
 856    size_t fd_num = 0;
 857    VhostUserMsg msg_reply;
 858    int region_i, msg_i;
 859
 860    VhostUserMsg msg = {
 861        .hdr.flags = VHOST_USER_VERSION,
 862    };
 863
 864    if (u->region_rb_len < dev->mem->nregions) {
 865        u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
 866        u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
 867                                      dev->mem->nregions);
 868        memset(&(u->region_rb[u->region_rb_len]), '\0',
 869               sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
 870        memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
 871               sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
 872        u->region_rb_len = dev->mem->nregions;
 873    }
 874
 875    if (config_mem_slots) {
 876        if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
 877                                          true) < 0) {
 878            return -1;
 879        }
 880    } else {
 881        if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
 882                                              true) < 0) {
 883            return -1;
 884        }
 885
 886        if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 887            return -1;
 888        }
 889
 890        if (vhost_user_read(dev, &msg_reply) < 0) {
 891            return -1;
 892        }
 893
 894        if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
 895            error_report("%s: Received unexpected msg type."
 896                         "Expected %d received %d", __func__,
 897                         VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
 898            return -1;
 899        }
 900
 901        /*
 902         * We're using the same structure, just reusing one of the
 903         * fields, so it should be the same size.
 904         */
 905        if (msg_reply.hdr.size != msg.hdr.size) {
 906            error_report("%s: Unexpected size for postcopy reply "
 907                         "%d vs %d", __func__, msg_reply.hdr.size,
 908                         msg.hdr.size);
 909            return -1;
 910        }
 911
 912        memset(u->postcopy_client_bases, 0,
 913               sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
 914
 915        /*
 916         * They're in the same order as the regions that were sent
 917         * but some of the regions were skipped (above) if they
 918         * didn't have fd's
 919         */
 920        for (msg_i = 0, region_i = 0;
 921             region_i < dev->mem->nregions;
 922             region_i++) {
 923            if (msg_i < fd_num &&
 924                msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
 925                dev->mem->regions[region_i].guest_phys_addr) {
 926                u->postcopy_client_bases[region_i] =
 927                    msg_reply.payload.memory.regions[msg_i].userspace_addr;
 928                trace_vhost_user_set_mem_table_postcopy(
 929                    msg_reply.payload.memory.regions[msg_i].userspace_addr,
 930                    msg.payload.memory.regions[msg_i].userspace_addr,
 931                    msg_i, region_i);
 932                msg_i++;
 933            }
 934        }
 935        if (msg_i != fd_num) {
 936            error_report("%s: postcopy reply not fully consumed "
 937                         "%d vs %zd",
 938                         __func__, msg_i, fd_num);
 939            return -1;
 940        }
 941
 942        /*
 943         * Now we've registered this with the postcopy code, we ack to the
 944         * client, because now we're in the position to be able to deal
 945         * with any faults it generates.
 946         */
 947        /* TODO: Use this for failure cases as well with a bad value. */
 948        msg.hdr.size = sizeof(msg.payload.u64);
 949        msg.payload.u64 = 0; /* OK */
 950        if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 951            return -1;
 952        }
 953    }
 954
 955    return 0;
 956}
 957
 958static int vhost_user_set_mem_table(struct vhost_dev *dev,
 959                                    struct vhost_memory *mem)
 960{
 961    struct vhost_user *u = dev->opaque;
 962    int fds[VHOST_MEMORY_BASELINE_NREGIONS];
 963    size_t fd_num = 0;
 964    bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
 965    bool reply_supported = virtio_has_feature(dev->protocol_features,
 966                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
 967    bool config_mem_slots =
 968        virtio_has_feature(dev->protocol_features,
 969                           VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
 970
 971    if (do_postcopy) {
 972        /*
 973         * Postcopy has enough differences that it's best done in it's own
 974         * version
 975         */
 976        return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
 977                                                 config_mem_slots);
 978    }
 979
 980    VhostUserMsg msg = {
 981        .hdr.flags = VHOST_USER_VERSION,
 982    };
 983
 984    if (reply_supported) {
 985        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
 986    }
 987
 988    if (config_mem_slots) {
 989        if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
 990                                          false) < 0) {
 991            return -1;
 992        }
 993    } else {
 994        if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
 995                                              false) < 0) {
 996            return -1;
 997        }
 998        if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 999            return -1;
1000        }
1001
1002        if (reply_supported) {
1003            return process_message_reply(dev, &msg);
1004        }
1005    }
1006
1007    return 0;
1008}
1009
1010static int vhost_user_set_vring_addr(struct vhost_dev *dev,
1011                                     struct vhost_vring_addr *addr)
1012{
1013    VhostUserMsg msg = {
1014        .hdr.request = VHOST_USER_SET_VRING_ADDR,
1015        .hdr.flags = VHOST_USER_VERSION,
1016        .payload.addr = *addr,
1017        .hdr.size = sizeof(msg.payload.addr),
1018    };
1019
1020    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1021        return -1;
1022    }
1023
1024    return 0;
1025}
1026
1027static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1028                                       struct vhost_vring_state *ring)
1029{
1030    bool cross_endian = virtio_has_feature(dev->protocol_features,
1031                                           VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1032    VhostUserMsg msg = {
1033        .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1034        .hdr.flags = VHOST_USER_VERSION,
1035        .payload.state = *ring,
1036        .hdr.size = sizeof(msg.payload.state),
1037    };
1038
1039    if (!cross_endian) {
1040        error_report("vhost-user trying to send unhandled ioctl");
1041        return -1;
1042    }
1043
1044    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1045        return -1;
1046    }
1047
1048    return 0;
1049}
1050
1051static int vhost_set_vring(struct vhost_dev *dev,
1052                           unsigned long int request,
1053                           struct vhost_vring_state *ring)
1054{
1055    VhostUserMsg msg = {
1056        .hdr.request = request,
1057        .hdr.flags = VHOST_USER_VERSION,
1058        .payload.state = *ring,
1059        .hdr.size = sizeof(msg.payload.state),
1060    };
1061
1062    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1063        return -1;
1064    }
1065
1066    return 0;
1067}
1068
1069static int vhost_user_set_vring_num(struct vhost_dev *dev,
1070                                    struct vhost_vring_state *ring)
1071{
1072    return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1073}
1074
1075static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
1076                                             int queue_idx)
1077{
1078    struct vhost_user *u = dev->opaque;
1079    VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
1080    VirtIODevice *vdev = dev->vdev;
1081
1082    if (n->addr && !n->set) {
1083        virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
1084        n->set = true;
1085    }
1086}
1087
1088static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
1089                                            int queue_idx)
1090{
1091    struct vhost_user *u = dev->opaque;
1092    VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
1093    VirtIODevice *vdev = dev->vdev;
1094
1095    if (n->addr && n->set) {
1096        virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
1097        n->set = false;
1098    }
1099}
1100
1101static int vhost_user_set_vring_base(struct vhost_dev *dev,
1102                                     struct vhost_vring_state *ring)
1103{
1104    vhost_user_host_notifier_restore(dev, ring->index);
1105
1106    return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1107}
1108
1109static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1110{
1111    int i;
1112
1113    if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1114        return -1;
1115    }
1116
1117    for (i = 0; i < dev->nvqs; ++i) {
1118        struct vhost_vring_state state = {
1119            .index = dev->vq_index + i,
1120            .num   = enable,
1121        };
1122
1123        vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1124    }
1125
1126    return 0;
1127}
1128
1129static int vhost_user_get_vring_base(struct vhost_dev *dev,
1130                                     struct vhost_vring_state *ring)
1131{
1132    VhostUserMsg msg = {
1133        .hdr.request = VHOST_USER_GET_VRING_BASE,
1134        .hdr.flags = VHOST_USER_VERSION,
1135        .payload.state = *ring,
1136        .hdr.size = sizeof(msg.payload.state),
1137    };
1138
1139    vhost_user_host_notifier_remove(dev, ring->index);
1140
1141    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1142        return -1;
1143    }
1144
1145    if (vhost_user_read(dev, &msg) < 0) {
1146        return -1;
1147    }
1148
1149    if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1150        error_report("Received unexpected msg type. Expected %d received %d",
1151                     VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1152        return -1;
1153    }
1154
1155    if (msg.hdr.size != sizeof(msg.payload.state)) {
1156        error_report("Received bad msg size.");
1157        return -1;
1158    }
1159
1160    *ring = msg.payload.state;
1161
1162    return 0;
1163}
1164
1165static int vhost_set_vring_file(struct vhost_dev *dev,
1166                                VhostUserRequest request,
1167                                struct vhost_vring_file *file)
1168{
1169    int fds[VHOST_USER_MAX_RAM_SLOTS];
1170    size_t fd_num = 0;
1171    VhostUserMsg msg = {
1172        .hdr.request = request,
1173        .hdr.flags = VHOST_USER_VERSION,
1174        .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1175        .hdr.size = sizeof(msg.payload.u64),
1176    };
1177
1178    if (ioeventfd_enabled() && file->fd > 0) {
1179        fds[fd_num++] = file->fd;
1180    } else {
1181        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1182    }
1183
1184    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
1185        return -1;
1186    }
1187
1188    return 0;
1189}
1190
1191static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1192                                     struct vhost_vring_file *file)
1193{
1194    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1195}
1196
1197static int vhost_user_set_vring_call(struct vhost_dev *dev,
1198                                     struct vhost_vring_file *file)
1199{
1200    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1201}
1202
1203static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
1204{
1205    VhostUserMsg msg = {
1206        .hdr.request = request,
1207        .hdr.flags = VHOST_USER_VERSION,
1208        .payload.u64 = u64,
1209        .hdr.size = sizeof(msg.payload.u64),
1210    };
1211
1212    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1213        return -1;
1214    }
1215
1216    return 0;
1217}
1218
1219static int vhost_user_set_features(struct vhost_dev *dev,
1220                                   uint64_t features)
1221{
1222    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
1223}
1224
1225static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1226                                            uint64_t features)
1227{
1228    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
1229}
1230
1231static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1232{
1233    VhostUserMsg msg = {
1234        .hdr.request = request,
1235        .hdr.flags = VHOST_USER_VERSION,
1236    };
1237
1238    if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1239        return 0;
1240    }
1241
1242    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1243        return -1;
1244    }
1245
1246    if (vhost_user_read(dev, &msg) < 0) {
1247        return -1;
1248    }
1249
1250    if (msg.hdr.request != request) {
1251        error_report("Received unexpected msg type. Expected %d received %d",
1252                     request, msg.hdr.request);
1253        return -1;
1254    }
1255
1256    if (msg.hdr.size != sizeof(msg.payload.u64)) {
1257        error_report("Received bad msg size.");
1258        return -1;
1259    }
1260
1261    *u64 = msg.payload.u64;
1262
1263    return 0;
1264}
1265
1266static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1267{
1268    return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
1269}
1270
1271static int vhost_user_set_owner(struct vhost_dev *dev)
1272{
1273    VhostUserMsg msg = {
1274        .hdr.request = VHOST_USER_SET_OWNER,
1275        .hdr.flags = VHOST_USER_VERSION,
1276    };
1277
1278    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1279        return -1;
1280    }
1281
1282    return 0;
1283}
1284
1285static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1286                                       uint64_t *max_memslots)
1287{
1288    uint64_t backend_max_memslots;
1289    int err;
1290
1291    err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1292                             &backend_max_memslots);
1293    if (err < 0) {
1294        return err;
1295    }
1296
1297    *max_memslots = backend_max_memslots;
1298
1299    return 0;
1300}
1301
1302static int vhost_user_reset_device(struct vhost_dev *dev)
1303{
1304    VhostUserMsg msg = {
1305        .hdr.flags = VHOST_USER_VERSION,
1306    };
1307
1308    msg.hdr.request = virtio_has_feature(dev->protocol_features,
1309                                         VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1310        ? VHOST_USER_RESET_DEVICE
1311        : VHOST_USER_RESET_OWNER;
1312
1313    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1314        return -1;
1315    }
1316
1317    return 0;
1318}
1319
1320static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
1321{
1322    int ret = -1;
1323
1324    if (!dev->config_ops) {
1325        return -1;
1326    }
1327
1328    if (dev->config_ops->vhost_dev_config_notifier) {
1329        ret = dev->config_ops->vhost_dev_config_notifier(dev);
1330    }
1331
1332    return ret;
1333}
1334
1335static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
1336                                                       VhostUserVringArea *area,
1337                                                       int fd)
1338{
1339    int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1340    size_t page_size = qemu_real_host_page_size;
1341    struct vhost_user *u = dev->opaque;
1342    VhostUserState *user = u->user;
1343    VirtIODevice *vdev = dev->vdev;
1344    VhostUserHostNotifier *n;
1345    void *addr;
1346    char *name;
1347
1348    if (!virtio_has_feature(dev->protocol_features,
1349                            VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1350        vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1351        return -1;
1352    }
1353
1354    n = &user->notifier[queue_idx];
1355
1356    if (n->addr) {
1357        virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
1358        object_unparent(OBJECT(&n->mr));
1359        munmap(n->addr, page_size);
1360        n->addr = NULL;
1361    }
1362
1363    if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1364        return 0;
1365    }
1366
1367    /* Sanity check. */
1368    if (area->size != page_size) {
1369        return -1;
1370    }
1371
1372    addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1373                fd, area->offset);
1374    if (addr == MAP_FAILED) {
1375        return -1;
1376    }
1377
1378    name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1379                           user, queue_idx);
1380    memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1381                                      page_size, addr);
1382    g_free(name);
1383
1384    if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1385        munmap(addr, page_size);
1386        return -1;
1387    }
1388
1389    n->addr = addr;
1390    n->set = true;
1391
1392    return 0;
1393}
1394
1395static void slave_read(void *opaque)
1396{
1397    struct vhost_dev *dev = opaque;
1398    struct vhost_user *u = dev->opaque;
1399    VhostUserHeader hdr = { 0, };
1400    VhostUserPayload payload = { 0, };
1401    int size, ret = 0;
1402    struct iovec iov;
1403    struct msghdr msgh;
1404    int fd[VHOST_USER_SLAVE_MAX_FDS];
1405    char control[CMSG_SPACE(sizeof(fd))];
1406    struct cmsghdr *cmsg;
1407    int i, fdsize = 0;
1408
1409    memset(&msgh, 0, sizeof(msgh));
1410    msgh.msg_iov = &iov;
1411    msgh.msg_iovlen = 1;
1412    msgh.msg_control = control;
1413    msgh.msg_controllen = sizeof(control);
1414
1415    memset(fd, -1, sizeof(fd));
1416
1417    /* Read header */
1418    iov.iov_base = &hdr;
1419    iov.iov_len = VHOST_USER_HDR_SIZE;
1420
1421    do {
1422        size = recvmsg(u->slave_fd, &msgh, 0);
1423    } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1424
1425    if (size != VHOST_USER_HDR_SIZE) {
1426        error_report("Failed to read from slave.");
1427        goto err;
1428    }
1429
1430    if (msgh.msg_flags & MSG_CTRUNC) {
1431        error_report("Truncated message.");
1432        goto err;
1433    }
1434
1435    for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
1436         cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
1437            if (cmsg->cmsg_level == SOL_SOCKET &&
1438                cmsg->cmsg_type == SCM_RIGHTS) {
1439                    fdsize = cmsg->cmsg_len - CMSG_LEN(0);
1440                    memcpy(fd, CMSG_DATA(cmsg), fdsize);
1441                    break;
1442            }
1443    }
1444
1445    if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1446        error_report("Failed to read msg header."
1447                " Size %d exceeds the maximum %zu.", hdr.size,
1448                VHOST_USER_PAYLOAD_SIZE);
1449        goto err;
1450    }
1451
1452    /* Read payload */
1453    do {
1454        size = read(u->slave_fd, &payload, hdr.size);
1455    } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1456
1457    if (size != hdr.size) {
1458        error_report("Failed to read payload from slave.");
1459        goto err;
1460    }
1461
1462    switch (hdr.request) {
1463    case VHOST_USER_SLAVE_IOTLB_MSG:
1464        ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1465        break;
1466    case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
1467        ret = vhost_user_slave_handle_config_change(dev);
1468        break;
1469    case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
1470        ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1471                                                          fd[0]);
1472        break;
1473    default:
1474        error_report("Received unexpected msg type: %d.", hdr.request);
1475        ret = -EINVAL;
1476    }
1477
1478    /* Close the remaining file descriptors. */
1479    for (i = 0; i < fdsize; i++) {
1480        if (fd[i] != -1) {
1481            close(fd[i]);
1482        }
1483    }
1484
1485    /*
1486     * REPLY_ACK feature handling. Other reply types has to be managed
1487     * directly in their request handlers.
1488     */
1489    if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1490        struct iovec iovec[2];
1491
1492
1493        hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1494        hdr.flags |= VHOST_USER_REPLY_MASK;
1495
1496        payload.u64 = !!ret;
1497        hdr.size = sizeof(payload.u64);
1498
1499        iovec[0].iov_base = &hdr;
1500        iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1501        iovec[1].iov_base = &payload;
1502        iovec[1].iov_len = hdr.size;
1503
1504        do {
1505            size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
1506        } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1507
1508        if (size != VHOST_USER_HDR_SIZE + hdr.size) {
1509            error_report("Failed to send msg reply to slave.");
1510            goto err;
1511        }
1512    }
1513
1514    return;
1515
1516err:
1517    qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1518    close(u->slave_fd);
1519    u->slave_fd = -1;
1520    for (i = 0; i < fdsize; i++) {
1521        if (fd[i] != -1) {
1522            close(fd[i]);
1523        }
1524    }
1525    return;
1526}
1527
1528static int vhost_setup_slave_channel(struct vhost_dev *dev)
1529{
1530    VhostUserMsg msg = {
1531        .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
1532        .hdr.flags = VHOST_USER_VERSION,
1533    };
1534    struct vhost_user *u = dev->opaque;
1535    int sv[2], ret = 0;
1536    bool reply_supported = virtio_has_feature(dev->protocol_features,
1537                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
1538
1539    if (!virtio_has_feature(dev->protocol_features,
1540                            VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
1541        return 0;
1542    }
1543
1544    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1545        error_report("socketpair() failed");
1546        return -1;
1547    }
1548
1549    u->slave_fd = sv[0];
1550    qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
1551
1552    if (reply_supported) {
1553        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1554    }
1555
1556    ret = vhost_user_write(dev, &msg, &sv[1], 1);
1557    if (ret) {
1558        goto out;
1559    }
1560
1561    if (reply_supported) {
1562        ret = process_message_reply(dev, &msg);
1563    }
1564
1565out:
1566    close(sv[1]);
1567    if (ret) {
1568        qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1569        close(u->slave_fd);
1570        u->slave_fd = -1;
1571    }
1572
1573    return ret;
1574}
1575
1576#ifdef CONFIG_LINUX
1577/*
1578 * Called back from the postcopy fault thread when a fault is received on our
1579 * ufd.
1580 * TODO: This is Linux specific
1581 */
1582static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1583                                             void *ufd)
1584{
1585    struct vhost_dev *dev = pcfd->data;
1586    struct vhost_user *u = dev->opaque;
1587    struct uffd_msg *msg = ufd;
1588    uint64_t faultaddr = msg->arg.pagefault.address;
1589    RAMBlock *rb = NULL;
1590    uint64_t rb_offset;
1591    int i;
1592
1593    trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1594                                            dev->mem->nregions);
1595    for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1596        trace_vhost_user_postcopy_fault_handler_loop(i,
1597                u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1598        if (faultaddr >= u->postcopy_client_bases[i]) {
1599            /* Ofset of the fault address in the vhost region */
1600            uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1601            if (region_offset < dev->mem->regions[i].memory_size) {
1602                rb_offset = region_offset + u->region_rb_offset[i];
1603                trace_vhost_user_postcopy_fault_handler_found(i,
1604                        region_offset, rb_offset);
1605                rb = u->region_rb[i];
1606                return postcopy_request_shared_page(pcfd, rb, faultaddr,
1607                                                    rb_offset);
1608            }
1609        }
1610    }
1611    error_report("%s: Failed to find region for fault %" PRIx64,
1612                 __func__, faultaddr);
1613    return -1;
1614}
1615
1616static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1617                                     uint64_t offset)
1618{
1619    struct vhost_dev *dev = pcfd->data;
1620    struct vhost_user *u = dev->opaque;
1621    int i;
1622
1623    trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1624
1625    if (!u) {
1626        return 0;
1627    }
1628    /* Translate the offset into an address in the clients address space */
1629    for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1630        if (u->region_rb[i] == rb &&
1631            offset >= u->region_rb_offset[i] &&
1632            offset < (u->region_rb_offset[i] +
1633                      dev->mem->regions[i].memory_size)) {
1634            uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1635                                   u->postcopy_client_bases[i];
1636            trace_vhost_user_postcopy_waker_found(client_addr);
1637            return postcopy_wake_shared(pcfd, client_addr, rb);
1638        }
1639    }
1640
1641    trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1642    return 0;
1643}
1644#endif
1645
1646/*
1647 * Called at the start of an inbound postcopy on reception of the
1648 * 'advise' command.
1649 */
1650static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1651{
1652#ifdef CONFIG_LINUX
1653    struct vhost_user *u = dev->opaque;
1654    CharBackend *chr = u->user->chr;
1655    int ufd;
1656    VhostUserMsg msg = {
1657        .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1658        .hdr.flags = VHOST_USER_VERSION,
1659    };
1660
1661    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1662        error_setg(errp, "Failed to send postcopy_advise to vhost");
1663        return -1;
1664    }
1665
1666    if (vhost_user_read(dev, &msg) < 0) {
1667        error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1668        return -1;
1669    }
1670
1671    if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1672        error_setg(errp, "Unexpected msg type. Expected %d received %d",
1673                     VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1674        return -1;
1675    }
1676
1677    if (msg.hdr.size) {
1678        error_setg(errp, "Received bad msg size.");
1679        return -1;
1680    }
1681    ufd = qemu_chr_fe_get_msgfd(chr);
1682    if (ufd < 0) {
1683        error_setg(errp, "%s: Failed to get ufd", __func__);
1684        return -1;
1685    }
1686    qemu_set_nonblock(ufd);
1687
1688    /* register ufd with userfault thread */
1689    u->postcopy_fd.fd = ufd;
1690    u->postcopy_fd.data = dev;
1691    u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1692    u->postcopy_fd.waker = vhost_user_postcopy_waker;
1693    u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1694    postcopy_register_shared_ufd(&u->postcopy_fd);
1695    return 0;
1696#else
1697    error_setg(errp, "Postcopy not supported on non-Linux systems");
1698    return -1;
1699#endif
1700}
1701
1702/*
1703 * Called at the switch to postcopy on reception of the 'listen' command.
1704 */
1705static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1706{
1707    struct vhost_user *u = dev->opaque;
1708    int ret;
1709    VhostUserMsg msg = {
1710        .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1711        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1712    };
1713    u->postcopy_listen = true;
1714    trace_vhost_user_postcopy_listen();
1715    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1716        error_setg(errp, "Failed to send postcopy_listen to vhost");
1717        return -1;
1718    }
1719
1720    ret = process_message_reply(dev, &msg);
1721    if (ret) {
1722        error_setg(errp, "Failed to receive reply to postcopy_listen");
1723        return ret;
1724    }
1725
1726    return 0;
1727}
1728
1729/*
1730 * Called at the end of postcopy
1731 */
1732static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1733{
1734    VhostUserMsg msg = {
1735        .hdr.request = VHOST_USER_POSTCOPY_END,
1736        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1737    };
1738    int ret;
1739    struct vhost_user *u = dev->opaque;
1740
1741    trace_vhost_user_postcopy_end_entry();
1742    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1743        error_setg(errp, "Failed to send postcopy_end to vhost");
1744        return -1;
1745    }
1746
1747    ret = process_message_reply(dev, &msg);
1748    if (ret) {
1749        error_setg(errp, "Failed to receive reply to postcopy_end");
1750        return ret;
1751    }
1752    postcopy_unregister_shared_ufd(&u->postcopy_fd);
1753    close(u->postcopy_fd.fd);
1754    u->postcopy_fd.handler = NULL;
1755
1756    trace_vhost_user_postcopy_end_exit();
1757
1758    return 0;
1759}
1760
1761static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1762                                        void *opaque)
1763{
1764    struct PostcopyNotifyData *pnd = opaque;
1765    struct vhost_user *u = container_of(notifier, struct vhost_user,
1766                                         postcopy_notifier);
1767    struct vhost_dev *dev = u->dev;
1768
1769    switch (pnd->reason) {
1770    case POSTCOPY_NOTIFY_PROBE:
1771        if (!virtio_has_feature(dev->protocol_features,
1772                                VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1773            /* TODO: Get the device name into this error somehow */
1774            error_setg(pnd->errp,
1775                       "vhost-user backend not capable of postcopy");
1776            return -ENOENT;
1777        }
1778        break;
1779
1780    case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1781        return vhost_user_postcopy_advise(dev, pnd->errp);
1782
1783    case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1784        return vhost_user_postcopy_listen(dev, pnd->errp);
1785
1786    case POSTCOPY_NOTIFY_INBOUND_END:
1787        return vhost_user_postcopy_end(dev, pnd->errp);
1788
1789    default:
1790        /* We ignore notifications we don't know */
1791        break;
1792    }
1793
1794    return 0;
1795}
1796
1797static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
1798{
1799    uint64_t features, protocol_features, ram_slots;
1800    struct vhost_user *u;
1801    int err;
1802
1803    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1804
1805    u = g_new0(struct vhost_user, 1);
1806    u->user = opaque;
1807    u->slave_fd = -1;
1808    u->dev = dev;
1809    dev->opaque = u;
1810
1811    err = vhost_user_get_features(dev, &features);
1812    if (err < 0) {
1813        return err;
1814    }
1815
1816    if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1817        dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1818
1819        err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1820                                 &protocol_features);
1821        if (err < 0) {
1822            return err;
1823        }
1824
1825        dev->protocol_features =
1826            protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1827
1828        if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1829            /* Don't acknowledge CONFIG feature if device doesn't support it */
1830            dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1831        } else if (!(protocol_features &
1832                    (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1833            error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1834                    "but backend does not support it.");
1835            return -1;
1836        }
1837
1838        err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1839        if (err < 0) {
1840            return err;
1841        }
1842
1843        /* query the max queues we support if backend supports Multiple Queue */
1844        if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1845            err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1846                                     &dev->max_queues);
1847            if (err < 0) {
1848                return err;
1849            }
1850        }
1851
1852        if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1853                !(virtio_has_feature(dev->protocol_features,
1854                    VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1855                 virtio_has_feature(dev->protocol_features,
1856                    VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1857            error_report("IOMMU support requires reply-ack and "
1858                         "slave-req protocol features.");
1859            return -1;
1860        }
1861
1862        /* get max memory regions if backend supports configurable RAM slots */
1863        if (!virtio_has_feature(dev->protocol_features,
1864                                VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
1865            u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
1866        } else {
1867            err = vhost_user_get_max_memslots(dev, &ram_slots);
1868            if (err < 0) {
1869                return err;
1870            }
1871
1872            if (ram_slots < u->user->memory_slots) {
1873                error_report("The backend specified a max ram slots limit "
1874                             "of %" PRIu64", when the prior validated limit was %d. "
1875                             "This limit should never decrease.", ram_slots,
1876                             u->user->memory_slots);
1877                return -1;
1878            }
1879
1880            u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
1881        }
1882    }
1883
1884    if (dev->migration_blocker == NULL &&
1885        !virtio_has_feature(dev->protocol_features,
1886                            VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1887        error_setg(&dev->migration_blocker,
1888                   "Migration disabled: vhost-user backend lacks "
1889                   "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1890    }
1891
1892    if (dev->vq_index == 0) {
1893        err = vhost_setup_slave_channel(dev);
1894        if (err < 0) {
1895            return err;
1896        }
1897    }
1898
1899    u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1900    postcopy_add_notifier(&u->postcopy_notifier);
1901
1902    return 0;
1903}
1904
1905static int vhost_user_backend_cleanup(struct vhost_dev *dev)
1906{
1907    struct vhost_user *u;
1908
1909    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1910
1911    u = dev->opaque;
1912    if (u->postcopy_notifier.notify) {
1913        postcopy_remove_notifier(&u->postcopy_notifier);
1914        u->postcopy_notifier.notify = NULL;
1915    }
1916    u->postcopy_listen = false;
1917    if (u->postcopy_fd.handler) {
1918        postcopy_unregister_shared_ufd(&u->postcopy_fd);
1919        close(u->postcopy_fd.fd);
1920        u->postcopy_fd.handler = NULL;
1921    }
1922    if (u->slave_fd >= 0) {
1923        qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1924        close(u->slave_fd);
1925        u->slave_fd = -1;
1926    }
1927    g_free(u->region_rb);
1928    u->region_rb = NULL;
1929    g_free(u->region_rb_offset);
1930    u->region_rb_offset = NULL;
1931    u->region_rb_len = 0;
1932    g_free(u);
1933    dev->opaque = 0;
1934
1935    return 0;
1936}
1937
1938static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1939{
1940    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1941
1942    return idx;
1943}
1944
1945static int vhost_user_memslots_limit(struct vhost_dev *dev)
1946{
1947    struct vhost_user *u = dev->opaque;
1948
1949    return u->user->memory_slots;
1950}
1951
1952static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1953{
1954    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1955
1956    return virtio_has_feature(dev->protocol_features,
1957                              VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1958}
1959
1960static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1961{
1962    VhostUserMsg msg = { };
1963
1964    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1965
1966    /* If guest supports GUEST_ANNOUNCE do nothing */
1967    if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1968        return 0;
1969    }
1970
1971    /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1972    if (virtio_has_feature(dev->protocol_features,
1973                           VHOST_USER_PROTOCOL_F_RARP)) {
1974        msg.hdr.request = VHOST_USER_SEND_RARP;
1975        msg.hdr.flags = VHOST_USER_VERSION;
1976        memcpy((char *)&msg.payload.u64, mac_addr, 6);
1977        msg.hdr.size = sizeof(msg.payload.u64);
1978
1979        return vhost_user_write(dev, &msg, NULL, 0);
1980    }
1981    return -1;
1982}
1983
1984static bool vhost_user_can_merge(struct vhost_dev *dev,
1985                                 uint64_t start1, uint64_t size1,
1986                                 uint64_t start2, uint64_t size2)
1987{
1988    ram_addr_t offset;
1989    int mfd, rfd;
1990
1991    (void)vhost_user_get_mr_data(start1, &offset, &mfd);
1992    (void)vhost_user_get_mr_data(start2, &offset, &rfd);
1993
1994    return mfd == rfd;
1995}
1996
1997static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1998{
1999    VhostUserMsg msg;
2000    bool reply_supported = virtio_has_feature(dev->protocol_features,
2001                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
2002
2003    if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
2004        return 0;
2005    }
2006
2007    msg.hdr.request = VHOST_USER_NET_SET_MTU;
2008    msg.payload.u64 = mtu;
2009    msg.hdr.size = sizeof(msg.payload.u64);
2010    msg.hdr.flags = VHOST_USER_VERSION;
2011    if (reply_supported) {
2012        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2013    }
2014
2015    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2016        return -1;
2017    }
2018
2019    /* If reply_ack supported, slave has to ack specified MTU is valid */
2020    if (reply_supported) {
2021        return process_message_reply(dev, &msg);
2022    }
2023
2024    return 0;
2025}
2026
2027static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2028                                            struct vhost_iotlb_msg *imsg)
2029{
2030    VhostUserMsg msg = {
2031        .hdr.request = VHOST_USER_IOTLB_MSG,
2032        .hdr.size = sizeof(msg.payload.iotlb),
2033        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2034        .payload.iotlb = *imsg,
2035    };
2036
2037    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2038        return -EFAULT;
2039    }
2040
2041    return process_message_reply(dev, &msg);
2042}
2043
2044
2045static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2046{
2047    /* No-op as the receive channel is not dedicated to IOTLB messages. */
2048}
2049
2050static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2051                                 uint32_t config_len)
2052{
2053    VhostUserMsg msg = {
2054        .hdr.request = VHOST_USER_GET_CONFIG,
2055        .hdr.flags = VHOST_USER_VERSION,
2056        .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2057    };
2058
2059    if (!virtio_has_feature(dev->protocol_features,
2060                VHOST_USER_PROTOCOL_F_CONFIG)) {
2061        return -1;
2062    }
2063
2064    if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
2065        return -1;
2066    }
2067
2068    msg.payload.config.offset = 0;
2069    msg.payload.config.size = config_len;
2070    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2071        return -1;
2072    }
2073
2074    if (vhost_user_read(dev, &msg) < 0) {
2075        return -1;
2076    }
2077
2078    if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2079        error_report("Received unexpected msg type. Expected %d received %d",
2080                     VHOST_USER_GET_CONFIG, msg.hdr.request);
2081        return -1;
2082    }
2083
2084    if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2085        error_report("Received bad msg size.");
2086        return -1;
2087    }
2088
2089    memcpy(config, msg.payload.config.region, config_len);
2090
2091    return 0;
2092}
2093
2094static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2095                                 uint32_t offset, uint32_t size, uint32_t flags)
2096{
2097    uint8_t *p;
2098    bool reply_supported = virtio_has_feature(dev->protocol_features,
2099                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
2100
2101    VhostUserMsg msg = {
2102        .hdr.request = VHOST_USER_SET_CONFIG,
2103        .hdr.flags = VHOST_USER_VERSION,
2104        .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2105    };
2106
2107    if (!virtio_has_feature(dev->protocol_features,
2108                VHOST_USER_PROTOCOL_F_CONFIG)) {
2109        return -1;
2110    }
2111
2112    if (reply_supported) {
2113        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2114    }
2115
2116    if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2117        return -1;
2118    }
2119
2120    msg.payload.config.offset = offset,
2121    msg.payload.config.size = size,
2122    msg.payload.config.flags = flags,
2123    p = msg.payload.config.region;
2124    memcpy(p, data, size);
2125
2126    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2127        return -1;
2128    }
2129
2130    if (reply_supported) {
2131        return process_message_reply(dev, &msg);
2132    }
2133
2134    return 0;
2135}
2136
2137static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2138                                            void *session_info,
2139                                            uint64_t *session_id)
2140{
2141    bool crypto_session = virtio_has_feature(dev->protocol_features,
2142                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2143    CryptoDevBackendSymSessionInfo *sess_info = session_info;
2144    VhostUserMsg msg = {
2145        .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2146        .hdr.flags = VHOST_USER_VERSION,
2147        .hdr.size = sizeof(msg.payload.session),
2148    };
2149
2150    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2151
2152    if (!crypto_session) {
2153        error_report("vhost-user trying to send unhandled ioctl");
2154        return -1;
2155    }
2156
2157    memcpy(&msg.payload.session.session_setup_data, sess_info,
2158              sizeof(CryptoDevBackendSymSessionInfo));
2159    if (sess_info->key_len) {
2160        memcpy(&msg.payload.session.key, sess_info->cipher_key,
2161               sess_info->key_len);
2162    }
2163    if (sess_info->auth_key_len > 0) {
2164        memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
2165               sess_info->auth_key_len);
2166    }
2167    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2168        error_report("vhost_user_write() return -1, create session failed");
2169        return -1;
2170    }
2171
2172    if (vhost_user_read(dev, &msg) < 0) {
2173        error_report("vhost_user_read() return -1, create session failed");
2174        return -1;
2175    }
2176
2177    if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2178        error_report("Received unexpected msg type. Expected %d received %d",
2179                     VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2180        return -1;
2181    }
2182
2183    if (msg.hdr.size != sizeof(msg.payload.session)) {
2184        error_report("Received bad msg size.");
2185        return -1;
2186    }
2187
2188    if (msg.payload.session.session_id < 0) {
2189        error_report("Bad session id: %" PRId64 "",
2190                              msg.payload.session.session_id);
2191        return -1;
2192    }
2193    *session_id = msg.payload.session.session_id;
2194
2195    return 0;
2196}
2197
2198static int
2199vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2200{
2201    bool crypto_session = virtio_has_feature(dev->protocol_features,
2202                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2203    VhostUserMsg msg = {
2204        .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2205        .hdr.flags = VHOST_USER_VERSION,
2206        .hdr.size = sizeof(msg.payload.u64),
2207    };
2208    msg.payload.u64 = session_id;
2209
2210    if (!crypto_session) {
2211        error_report("vhost-user trying to send unhandled ioctl");
2212        return -1;
2213    }
2214
2215    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2216        error_report("vhost_user_write() return -1, close session failed");
2217        return -1;
2218    }
2219
2220    return 0;
2221}
2222
2223static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2224                                          MemoryRegionSection *section)
2225{
2226    bool result;
2227
2228    result = memory_region_get_fd(section->mr) >= 0;
2229
2230    return result;
2231}
2232
2233static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2234                                      uint16_t queue_size,
2235                                      struct vhost_inflight *inflight)
2236{
2237    void *addr;
2238    int fd;
2239    struct vhost_user *u = dev->opaque;
2240    CharBackend *chr = u->user->chr;
2241    VhostUserMsg msg = {
2242        .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2243        .hdr.flags = VHOST_USER_VERSION,
2244        .payload.inflight.num_queues = dev->nvqs,
2245        .payload.inflight.queue_size = queue_size,
2246        .hdr.size = sizeof(msg.payload.inflight),
2247    };
2248
2249    if (!virtio_has_feature(dev->protocol_features,
2250                            VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2251        return 0;
2252    }
2253
2254    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2255        return -1;
2256    }
2257
2258    if (vhost_user_read(dev, &msg) < 0) {
2259        return -1;
2260    }
2261
2262    if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2263        error_report("Received unexpected msg type. "
2264                     "Expected %d received %d",
2265                     VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2266        return -1;
2267    }
2268
2269    if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2270        error_report("Received bad msg size.");
2271        return -1;
2272    }
2273
2274    if (!msg.payload.inflight.mmap_size) {
2275        return 0;
2276    }
2277
2278    fd = qemu_chr_fe_get_msgfd(chr);
2279    if (fd < 0) {
2280        error_report("Failed to get mem fd");
2281        return -1;
2282    }
2283
2284    addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2285                MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2286
2287    if (addr == MAP_FAILED) {
2288        error_report("Failed to mmap mem fd");
2289        close(fd);
2290        return -1;
2291    }
2292
2293    inflight->addr = addr;
2294    inflight->fd = fd;
2295    inflight->size = msg.payload.inflight.mmap_size;
2296    inflight->offset = msg.payload.inflight.mmap_offset;
2297    inflight->queue_size = queue_size;
2298
2299    return 0;
2300}
2301
2302static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2303                                      struct vhost_inflight *inflight)
2304{
2305    VhostUserMsg msg = {
2306        .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2307        .hdr.flags = VHOST_USER_VERSION,
2308        .payload.inflight.mmap_size = inflight->size,
2309        .payload.inflight.mmap_offset = inflight->offset,
2310        .payload.inflight.num_queues = dev->nvqs,
2311        .payload.inflight.queue_size = inflight->queue_size,
2312        .hdr.size = sizeof(msg.payload.inflight),
2313    };
2314
2315    if (!virtio_has_feature(dev->protocol_features,
2316                            VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2317        return 0;
2318    }
2319
2320    if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) {
2321        return -1;
2322    }
2323
2324    return 0;
2325}
2326
2327bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2328{
2329    if (user->chr) {
2330        error_setg(errp, "Cannot initialize vhost-user state");
2331        return false;
2332    }
2333    user->chr = chr;
2334    user->memory_slots = 0;
2335    return true;
2336}
2337
2338void vhost_user_cleanup(VhostUserState *user)
2339{
2340    int i;
2341
2342    if (!user->chr) {
2343        return;
2344    }
2345
2346    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2347        if (user->notifier[i].addr) {
2348            object_unparent(OBJECT(&user->notifier[i].mr));
2349            munmap(user->notifier[i].addr, qemu_real_host_page_size);
2350            user->notifier[i].addr = NULL;
2351        }
2352    }
2353    user->chr = NULL;
2354}
2355
2356const VhostOps user_ops = {
2357        .backend_type = VHOST_BACKEND_TYPE_USER,
2358        .vhost_backend_init = vhost_user_backend_init,
2359        .vhost_backend_cleanup = vhost_user_backend_cleanup,
2360        .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2361        .vhost_set_log_base = vhost_user_set_log_base,
2362        .vhost_set_mem_table = vhost_user_set_mem_table,
2363        .vhost_set_vring_addr = vhost_user_set_vring_addr,
2364        .vhost_set_vring_endian = vhost_user_set_vring_endian,
2365        .vhost_set_vring_num = vhost_user_set_vring_num,
2366        .vhost_set_vring_base = vhost_user_set_vring_base,
2367        .vhost_get_vring_base = vhost_user_get_vring_base,
2368        .vhost_set_vring_kick = vhost_user_set_vring_kick,
2369        .vhost_set_vring_call = vhost_user_set_vring_call,
2370        .vhost_set_features = vhost_user_set_features,
2371        .vhost_get_features = vhost_user_get_features,
2372        .vhost_set_owner = vhost_user_set_owner,
2373        .vhost_reset_device = vhost_user_reset_device,
2374        .vhost_get_vq_index = vhost_user_get_vq_index,
2375        .vhost_set_vring_enable = vhost_user_set_vring_enable,
2376        .vhost_requires_shm_log = vhost_user_requires_shm_log,
2377        .vhost_migration_done = vhost_user_migration_done,
2378        .vhost_backend_can_merge = vhost_user_can_merge,
2379        .vhost_net_set_mtu = vhost_user_net_set_mtu,
2380        .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2381        .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2382        .vhost_get_config = vhost_user_get_config,
2383        .vhost_set_config = vhost_user_set_config,
2384        .vhost_crypto_create_session = vhost_user_crypto_create_session,
2385        .vhost_crypto_close_session = vhost_user_crypto_close_session,
2386        .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2387        .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2388        .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
2389};
2390