qemu/hw/virtio/vhost-user.c
<<
>>
Prefs
   1/*
   2 * vhost-user
   3 *
   4 * Copyright (c) 2013 Virtual Open Systems Sarl.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 *
   9 */
  10
  11#include "qemu/osdep.h"
  12#include "qapi/error.h"
  13#include "hw/virtio/vhost.h"
  14#include "hw/virtio/vhost-user.h"
  15#include "hw/virtio/vhost-backend.h"
  16#include "hw/virtio/virtio.h"
  17#include "hw/virtio/virtio-net.h"
  18#include "chardev/char-fe.h"
  19#include "sysemu/kvm.h"
  20#include "qemu/error-report.h"
  21#include "qemu/sockets.h"
  22#include "sysemu/cryptodev.h"
  23#include "migration/migration.h"
  24#include "migration/postcopy-ram.h"
  25#include "trace.h"
  26
  27#include <sys/ioctl.h>
  28#include <sys/socket.h>
  29#include <sys/un.h>
  30#include <linux/vhost.h>
  31#include <linux/userfaultfd.h>
  32
  33#define VHOST_MEMORY_MAX_NREGIONS    8
  34#define VHOST_USER_F_PROTOCOL_FEATURES 30
  35#define VHOST_USER_SLAVE_MAX_FDS     8
  36
  37/*
  38 * Maximum size of virtio device config space
  39 */
  40#define VHOST_USER_MAX_CONFIG_SIZE 256
  41
  42enum VhostUserProtocolFeature {
  43    VHOST_USER_PROTOCOL_F_MQ = 0,
  44    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
  45    VHOST_USER_PROTOCOL_F_RARP = 2,
  46    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
  47    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
  48    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
  49    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
  50    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
  51    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
  52    VHOST_USER_PROTOCOL_F_CONFIG = 9,
  53    VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
  54    VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
  55    VHOST_USER_PROTOCOL_F_MAX
  56};
  57
  58#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
  59
  60typedef enum VhostUserRequest {
  61    VHOST_USER_NONE = 0,
  62    VHOST_USER_GET_FEATURES = 1,
  63    VHOST_USER_SET_FEATURES = 2,
  64    VHOST_USER_SET_OWNER = 3,
  65    VHOST_USER_RESET_OWNER = 4,
  66    VHOST_USER_SET_MEM_TABLE = 5,
  67    VHOST_USER_SET_LOG_BASE = 6,
  68    VHOST_USER_SET_LOG_FD = 7,
  69    VHOST_USER_SET_VRING_NUM = 8,
  70    VHOST_USER_SET_VRING_ADDR = 9,
  71    VHOST_USER_SET_VRING_BASE = 10,
  72    VHOST_USER_GET_VRING_BASE = 11,
  73    VHOST_USER_SET_VRING_KICK = 12,
  74    VHOST_USER_SET_VRING_CALL = 13,
  75    VHOST_USER_SET_VRING_ERR = 14,
  76    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
  77    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
  78    VHOST_USER_GET_QUEUE_NUM = 17,
  79    VHOST_USER_SET_VRING_ENABLE = 18,
  80    VHOST_USER_SEND_RARP = 19,
  81    VHOST_USER_NET_SET_MTU = 20,
  82    VHOST_USER_SET_SLAVE_REQ_FD = 21,
  83    VHOST_USER_IOTLB_MSG = 22,
  84    VHOST_USER_SET_VRING_ENDIAN = 23,
  85    VHOST_USER_GET_CONFIG = 24,
  86    VHOST_USER_SET_CONFIG = 25,
  87    VHOST_USER_CREATE_CRYPTO_SESSION = 26,
  88    VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
  89    VHOST_USER_POSTCOPY_ADVISE  = 28,
  90    VHOST_USER_POSTCOPY_LISTEN  = 29,
  91    VHOST_USER_POSTCOPY_END     = 30,
  92    VHOST_USER_MAX
  93} VhostUserRequest;
  94
  95typedef enum VhostUserSlaveRequest {
  96    VHOST_USER_SLAVE_NONE = 0,
  97    VHOST_USER_SLAVE_IOTLB_MSG = 1,
  98    VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
  99    VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
 100    VHOST_USER_SLAVE_MAX
 101}  VhostUserSlaveRequest;
 102
 103typedef struct VhostUserMemoryRegion {
 104    uint64_t guest_phys_addr;
 105    uint64_t memory_size;
 106    uint64_t userspace_addr;
 107    uint64_t mmap_offset;
 108} VhostUserMemoryRegion;
 109
 110typedef struct VhostUserMemory {
 111    uint32_t nregions;
 112    uint32_t padding;
 113    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 114} VhostUserMemory;
 115
 116typedef struct VhostUserLog {
 117    uint64_t mmap_size;
 118    uint64_t mmap_offset;
 119} VhostUserLog;
 120
 121typedef struct VhostUserConfig {
 122    uint32_t offset;
 123    uint32_t size;
 124    uint32_t flags;
 125    uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
 126} VhostUserConfig;
 127
 128#define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
 129#define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
 130
 131typedef struct VhostUserCryptoSession {
 132    /* session id for success, -1 on errors */
 133    int64_t session_id;
 134    CryptoDevBackendSymSessionInfo session_setup_data;
 135    uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
 136    uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
 137} VhostUserCryptoSession;
 138
 139static VhostUserConfig c __attribute__ ((unused));
 140#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
 141                                   + sizeof(c.size) \
 142                                   + sizeof(c.flags))
 143
 144typedef struct VhostUserVringArea {
 145    uint64_t u64;
 146    uint64_t size;
 147    uint64_t offset;
 148} VhostUserVringArea;
 149
 150typedef struct {
 151    VhostUserRequest request;
 152
 153#define VHOST_USER_VERSION_MASK     (0x3)
 154#define VHOST_USER_REPLY_MASK       (0x1<<2)
 155#define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
 156    uint32_t flags;
 157    uint32_t size; /* the following payload size */
 158} QEMU_PACKED VhostUserHeader;
 159
 160typedef union {
 161#define VHOST_USER_VRING_IDX_MASK   (0xff)
 162#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
 163        uint64_t u64;
 164        struct vhost_vring_state state;
 165        struct vhost_vring_addr addr;
 166        VhostUserMemory memory;
 167        VhostUserLog log;
 168        struct vhost_iotlb_msg iotlb;
 169        VhostUserConfig config;
 170        VhostUserCryptoSession session;
 171        VhostUserVringArea area;
 172} VhostUserPayload;
 173
 174typedef struct VhostUserMsg {
 175    VhostUserHeader hdr;
 176    VhostUserPayload payload;
 177} QEMU_PACKED VhostUserMsg;
 178
 179static VhostUserMsg m __attribute__ ((unused));
 180#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
 181
 182#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
 183
 184/* The version of the protocol we support */
 185#define VHOST_USER_VERSION    (0x1)
 186
 187struct vhost_user {
 188    struct vhost_dev *dev;
 189    /* Shared between vhost devs of the same virtio device */
 190    VhostUserState *user;
 191    int slave_fd;
 192    NotifierWithReturn postcopy_notifier;
 193    struct PostCopyFD  postcopy_fd;
 194    uint64_t           postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
 195    /* Length of the region_rb and region_rb_offset arrays */
 196    size_t             region_rb_len;
 197    /* RAMBlock associated with a given region */
 198    RAMBlock         **region_rb;
 199    /* The offset from the start of the RAMBlock to the start of the
 200     * vhost region.
 201     */
 202    ram_addr_t        *region_rb_offset;
 203
 204    /* True once we've entered postcopy_listen */
 205    bool               postcopy_listen;
 206};
 207
 208static bool ioeventfd_enabled(void)
 209{
 210    return kvm_enabled() && kvm_eventfds_enabled();
 211}
 212
 213static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
 214{
 215    struct vhost_user *u = dev->opaque;
 216    CharBackend *chr = u->user->chr;
 217    uint8_t *p = (uint8_t *) msg;
 218    int r, size = VHOST_USER_HDR_SIZE;
 219
 220    r = qemu_chr_fe_read_all(chr, p, size);
 221    if (r != size) {
 222        error_report("Failed to read msg header. Read %d instead of %d."
 223                     " Original request %d.", r, size, msg->hdr.request);
 224        goto fail;
 225    }
 226
 227    /* validate received flags */
 228    if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
 229        error_report("Failed to read msg header."
 230                " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
 231                VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
 232        goto fail;
 233    }
 234
 235    /* validate message size is sane */
 236    if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
 237        error_report("Failed to read msg header."
 238                " Size %d exceeds the maximum %zu.", msg->hdr.size,
 239                VHOST_USER_PAYLOAD_SIZE);
 240        goto fail;
 241    }
 242
 243    if (msg->hdr.size) {
 244        p += VHOST_USER_HDR_SIZE;
 245        size = msg->hdr.size;
 246        r = qemu_chr_fe_read_all(chr, p, size);
 247        if (r != size) {
 248            error_report("Failed to read msg payload."
 249                         " Read %d instead of %d.", r, msg->hdr.size);
 250            goto fail;
 251        }
 252    }
 253
 254    return 0;
 255
 256fail:
 257    return -1;
 258}
 259
 260static int process_message_reply(struct vhost_dev *dev,
 261                                 const VhostUserMsg *msg)
 262{
 263    VhostUserMsg msg_reply;
 264
 265    if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
 266        return 0;
 267    }
 268
 269    if (vhost_user_read(dev, &msg_reply) < 0) {
 270        return -1;
 271    }
 272
 273    if (msg_reply.hdr.request != msg->hdr.request) {
 274        error_report("Received unexpected msg type."
 275                     "Expected %d received %d",
 276                     msg->hdr.request, msg_reply.hdr.request);
 277        return -1;
 278    }
 279
 280    return msg_reply.payload.u64 ? -1 : 0;
 281}
 282
 283static bool vhost_user_one_time_request(VhostUserRequest request)
 284{
 285    switch (request) {
 286    case VHOST_USER_SET_OWNER:
 287    case VHOST_USER_RESET_OWNER:
 288    case VHOST_USER_SET_MEM_TABLE:
 289    case VHOST_USER_GET_QUEUE_NUM:
 290    case VHOST_USER_NET_SET_MTU:
 291        return true;
 292    default:
 293        return false;
 294    }
 295}
 296
 297/* most non-init callers ignore the error */
 298static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
 299                            int *fds, int fd_num)
 300{
 301    struct vhost_user *u = dev->opaque;
 302    CharBackend *chr = u->user->chr;
 303    int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
 304
 305    /*
 306     * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
 307     * we just need send it once in the first time. For later such
 308     * request, we just ignore it.
 309     */
 310    if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
 311        msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
 312        return 0;
 313    }
 314
 315    if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
 316        error_report("Failed to set msg fds.");
 317        return -1;
 318    }
 319
 320    ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
 321    if (ret != size) {
 322        error_report("Failed to write msg."
 323                     " Wrote %d instead of %d.", ret, size);
 324        return -1;
 325    }
 326
 327    return 0;
 328}
 329
 330static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
 331                                   struct vhost_log *log)
 332{
 333    int fds[VHOST_MEMORY_MAX_NREGIONS];
 334    size_t fd_num = 0;
 335    bool shmfd = virtio_has_feature(dev->protocol_features,
 336                                    VHOST_USER_PROTOCOL_F_LOG_SHMFD);
 337    VhostUserMsg msg = {
 338        .hdr.request = VHOST_USER_SET_LOG_BASE,
 339        .hdr.flags = VHOST_USER_VERSION,
 340        .payload.log.mmap_size = log->size * sizeof(*(log->log)),
 341        .payload.log.mmap_offset = 0,
 342        .hdr.size = sizeof(msg.payload.log),
 343    };
 344
 345    if (shmfd && log->fd != -1) {
 346        fds[fd_num++] = log->fd;
 347    }
 348
 349    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 350        return -1;
 351    }
 352
 353    if (shmfd) {
 354        msg.hdr.size = 0;
 355        if (vhost_user_read(dev, &msg) < 0) {
 356            return -1;
 357        }
 358
 359        if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
 360            error_report("Received unexpected msg type. "
 361                         "Expected %d received %d",
 362                         VHOST_USER_SET_LOG_BASE, msg.hdr.request);
 363            return -1;
 364        }
 365    }
 366
 367    return 0;
 368}
 369
 370static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
 371                                             struct vhost_memory *mem)
 372{
 373    struct vhost_user *u = dev->opaque;
 374    int fds[VHOST_MEMORY_MAX_NREGIONS];
 375    int i, fd;
 376    size_t fd_num = 0;
 377    VhostUserMsg msg_reply;
 378    int region_i, msg_i;
 379
 380    VhostUserMsg msg = {
 381        .hdr.request = VHOST_USER_SET_MEM_TABLE,
 382        .hdr.flags = VHOST_USER_VERSION,
 383    };
 384
 385    if (u->region_rb_len < dev->mem->nregions) {
 386        u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
 387        u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
 388                                      dev->mem->nregions);
 389        memset(&(u->region_rb[u->region_rb_len]), '\0',
 390               sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
 391        memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
 392               sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
 393        u->region_rb_len = dev->mem->nregions;
 394    }
 395
 396    for (i = 0; i < dev->mem->nregions; ++i) {
 397        struct vhost_memory_region *reg = dev->mem->regions + i;
 398        ram_addr_t offset;
 399        MemoryRegion *mr;
 400
 401        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
 402        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
 403                                     &offset);
 404        fd = memory_region_get_fd(mr);
 405        if (fd > 0) {
 406            trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
 407                                                  reg->memory_size,
 408                                                  reg->guest_phys_addr,
 409                                                  reg->userspace_addr, offset);
 410            u->region_rb_offset[i] = offset;
 411            u->region_rb[i] = mr->ram_block;
 412            msg.payload.memory.regions[fd_num].userspace_addr =
 413                reg->userspace_addr;
 414            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
 415            msg.payload.memory.regions[fd_num].guest_phys_addr =
 416                reg->guest_phys_addr;
 417            msg.payload.memory.regions[fd_num].mmap_offset = offset;
 418            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
 419            fds[fd_num++] = fd;
 420        } else {
 421            u->region_rb_offset[i] = 0;
 422            u->region_rb[i] = NULL;
 423        }
 424    }
 425
 426    msg.payload.memory.nregions = fd_num;
 427
 428    if (!fd_num) {
 429        error_report("Failed initializing vhost-user memory map, "
 430                     "consider using -object memory-backend-file share=on");
 431        return -1;
 432    }
 433
 434    msg.hdr.size = sizeof(msg.payload.memory.nregions);
 435    msg.hdr.size += sizeof(msg.payload.memory.padding);
 436    msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
 437
 438    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 439        return -1;
 440    }
 441
 442    if (vhost_user_read(dev, &msg_reply) < 0) {
 443        return -1;
 444    }
 445
 446    if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
 447        error_report("%s: Received unexpected msg type."
 448                     "Expected %d received %d", __func__,
 449                     VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
 450        return -1;
 451    }
 452    /* We're using the same structure, just reusing one of the
 453     * fields, so it should be the same size.
 454     */
 455    if (msg_reply.hdr.size != msg.hdr.size) {
 456        error_report("%s: Unexpected size for postcopy reply "
 457                     "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size);
 458        return -1;
 459    }
 460
 461    memset(u->postcopy_client_bases, 0,
 462           sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
 463
 464    /* They're in the same order as the regions that were sent
 465     * but some of the regions were skipped (above) if they
 466     * didn't have fd's
 467    */
 468    for (msg_i = 0, region_i = 0;
 469         region_i < dev->mem->nregions;
 470        region_i++) {
 471        if (msg_i < fd_num &&
 472            msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
 473            dev->mem->regions[region_i].guest_phys_addr) {
 474            u->postcopy_client_bases[region_i] =
 475                msg_reply.payload.memory.regions[msg_i].userspace_addr;
 476            trace_vhost_user_set_mem_table_postcopy(
 477                msg_reply.payload.memory.regions[msg_i].userspace_addr,
 478                msg.payload.memory.regions[msg_i].userspace_addr,
 479                msg_i, region_i);
 480            msg_i++;
 481        }
 482    }
 483    if (msg_i != fd_num) {
 484        error_report("%s: postcopy reply not fully consumed "
 485                     "%d vs %zd",
 486                     __func__, msg_i, fd_num);
 487        return -1;
 488    }
 489    /* Now we've registered this with the postcopy code, we ack to the client,
 490     * because now we're in the position to be able to deal with any faults
 491     * it generates.
 492     */
 493    /* TODO: Use this for failure cases as well with a bad value */
 494    msg.hdr.size = sizeof(msg.payload.u64);
 495    msg.payload.u64 = 0; /* OK */
 496    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 497        return -1;
 498    }
 499
 500    return 0;
 501}
 502
 503static int vhost_user_set_mem_table(struct vhost_dev *dev,
 504                                    struct vhost_memory *mem)
 505{
 506    struct vhost_user *u = dev->opaque;
 507    int fds[VHOST_MEMORY_MAX_NREGIONS];
 508    int i, fd;
 509    size_t fd_num = 0;
 510    bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
 511    bool reply_supported = virtio_has_feature(dev->protocol_features,
 512                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
 513
 514    if (do_postcopy) {
 515        /* Postcopy has enough differences that it's best done in it's own
 516         * version
 517         */
 518        return vhost_user_set_mem_table_postcopy(dev, mem);
 519    }
 520
 521    VhostUserMsg msg = {
 522        .hdr.request = VHOST_USER_SET_MEM_TABLE,
 523        .hdr.flags = VHOST_USER_VERSION,
 524    };
 525
 526    if (reply_supported) {
 527        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
 528    }
 529
 530    for (i = 0; i < dev->mem->nregions; ++i) {
 531        struct vhost_memory_region *reg = dev->mem->regions + i;
 532        ram_addr_t offset;
 533        MemoryRegion *mr;
 534
 535        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
 536        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
 537                                     &offset);
 538        fd = memory_region_get_fd(mr);
 539        if (fd > 0) {
 540            if (fd_num == VHOST_MEMORY_MAX_NREGIONS) {
 541                error_report("Failed preparing vhost-user memory table msg");
 542                return -1;
 543            }
 544            msg.payload.memory.regions[fd_num].userspace_addr =
 545                reg->userspace_addr;
 546            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
 547            msg.payload.memory.regions[fd_num].guest_phys_addr =
 548                reg->guest_phys_addr;
 549            msg.payload.memory.regions[fd_num].mmap_offset = offset;
 550            fds[fd_num++] = fd;
 551        }
 552    }
 553
 554    msg.payload.memory.nregions = fd_num;
 555
 556    if (!fd_num) {
 557        error_report("Failed initializing vhost-user memory map, "
 558                     "consider using -object memory-backend-file share=on");
 559        return -1;
 560    }
 561
 562    msg.hdr.size = sizeof(msg.payload.memory.nregions);
 563    msg.hdr.size += sizeof(msg.payload.memory.padding);
 564    msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
 565
 566    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 567        return -1;
 568    }
 569
 570    if (reply_supported) {
 571        return process_message_reply(dev, &msg);
 572    }
 573
 574    return 0;
 575}
 576
 577static int vhost_user_set_vring_addr(struct vhost_dev *dev,
 578                                     struct vhost_vring_addr *addr)
 579{
 580    VhostUserMsg msg = {
 581        .hdr.request = VHOST_USER_SET_VRING_ADDR,
 582        .hdr.flags = VHOST_USER_VERSION,
 583        .payload.addr = *addr,
 584        .hdr.size = sizeof(msg.payload.addr),
 585    };
 586
 587    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 588        return -1;
 589    }
 590
 591    return 0;
 592}
 593
 594static int vhost_user_set_vring_endian(struct vhost_dev *dev,
 595                                       struct vhost_vring_state *ring)
 596{
 597    bool cross_endian = virtio_has_feature(dev->protocol_features,
 598                                           VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
 599    VhostUserMsg msg = {
 600        .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
 601        .hdr.flags = VHOST_USER_VERSION,
 602        .payload.state = *ring,
 603        .hdr.size = sizeof(msg.payload.state),
 604    };
 605
 606    if (!cross_endian) {
 607        error_report("vhost-user trying to send unhandled ioctl");
 608        return -1;
 609    }
 610
 611    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 612        return -1;
 613    }
 614
 615    return 0;
 616}
 617
 618static int vhost_set_vring(struct vhost_dev *dev,
 619                           unsigned long int request,
 620                           struct vhost_vring_state *ring)
 621{
 622    VhostUserMsg msg = {
 623        .hdr.request = request,
 624        .hdr.flags = VHOST_USER_VERSION,
 625        .payload.state = *ring,
 626        .hdr.size = sizeof(msg.payload.state),
 627    };
 628
 629    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 630        return -1;
 631    }
 632
 633    return 0;
 634}
 635
 636static int vhost_user_set_vring_num(struct vhost_dev *dev,
 637                                    struct vhost_vring_state *ring)
 638{
 639    return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
 640}
 641
 642static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
 643                                             int queue_idx)
 644{
 645    struct vhost_user *u = dev->opaque;
 646    VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
 647    VirtIODevice *vdev = dev->vdev;
 648
 649    if (n->addr && !n->set) {
 650        virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
 651        n->set = true;
 652    }
 653}
 654
 655static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
 656                                            int queue_idx)
 657{
 658    struct vhost_user *u = dev->opaque;
 659    VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
 660    VirtIODevice *vdev = dev->vdev;
 661
 662    if (n->addr && n->set) {
 663        virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
 664        n->set = false;
 665    }
 666}
 667
 668static int vhost_user_set_vring_base(struct vhost_dev *dev,
 669                                     struct vhost_vring_state *ring)
 670{
 671    vhost_user_host_notifier_restore(dev, ring->index);
 672
 673    return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
 674}
 675
 676static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
 677{
 678    int i;
 679
 680    if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
 681        return -1;
 682    }
 683
 684    for (i = 0; i < dev->nvqs; ++i) {
 685        struct vhost_vring_state state = {
 686            .index = dev->vq_index + i,
 687            .num   = enable,
 688        };
 689
 690        vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
 691    }
 692
 693    return 0;
 694}
 695
 696static int vhost_user_get_vring_base(struct vhost_dev *dev,
 697                                     struct vhost_vring_state *ring)
 698{
 699    VhostUserMsg msg = {
 700        .hdr.request = VHOST_USER_GET_VRING_BASE,
 701        .hdr.flags = VHOST_USER_VERSION,
 702        .payload.state = *ring,
 703        .hdr.size = sizeof(msg.payload.state),
 704    };
 705
 706    vhost_user_host_notifier_remove(dev, ring->index);
 707
 708    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 709        return -1;
 710    }
 711
 712    if (vhost_user_read(dev, &msg) < 0) {
 713        return -1;
 714    }
 715
 716    if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
 717        error_report("Received unexpected msg type. Expected %d received %d",
 718                     VHOST_USER_GET_VRING_BASE, msg.hdr.request);
 719        return -1;
 720    }
 721
 722    if (msg.hdr.size != sizeof(msg.payload.state)) {
 723        error_report("Received bad msg size.");
 724        return -1;
 725    }
 726
 727    *ring = msg.payload.state;
 728
 729    return 0;
 730}
 731
 732static int vhost_set_vring_file(struct vhost_dev *dev,
 733                                VhostUserRequest request,
 734                                struct vhost_vring_file *file)
 735{
 736    int fds[VHOST_MEMORY_MAX_NREGIONS];
 737    size_t fd_num = 0;
 738    VhostUserMsg msg = {
 739        .hdr.request = request,
 740        .hdr.flags = VHOST_USER_VERSION,
 741        .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
 742        .hdr.size = sizeof(msg.payload.u64),
 743    };
 744
 745    if (ioeventfd_enabled() && file->fd > 0) {
 746        fds[fd_num++] = file->fd;
 747    } else {
 748        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
 749    }
 750
 751    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 752        return -1;
 753    }
 754
 755    return 0;
 756}
 757
 758static int vhost_user_set_vring_kick(struct vhost_dev *dev,
 759                                     struct vhost_vring_file *file)
 760{
 761    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
 762}
 763
 764static int vhost_user_set_vring_call(struct vhost_dev *dev,
 765                                     struct vhost_vring_file *file)
 766{
 767    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
 768}
 769
 770static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
 771{
 772    VhostUserMsg msg = {
 773        .hdr.request = request,
 774        .hdr.flags = VHOST_USER_VERSION,
 775        .payload.u64 = u64,
 776        .hdr.size = sizeof(msg.payload.u64),
 777    };
 778
 779    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 780        return -1;
 781    }
 782
 783    return 0;
 784}
 785
 786static int vhost_user_set_features(struct vhost_dev *dev,
 787                                   uint64_t features)
 788{
 789    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
 790}
 791
 792static int vhost_user_set_protocol_features(struct vhost_dev *dev,
 793                                            uint64_t features)
 794{
 795    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
 796}
 797
 798static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
 799{
 800    VhostUserMsg msg = {
 801        .hdr.request = request,
 802        .hdr.flags = VHOST_USER_VERSION,
 803    };
 804
 805    if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
 806        return 0;
 807    }
 808
 809    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 810        return -1;
 811    }
 812
 813    if (vhost_user_read(dev, &msg) < 0) {
 814        return -1;
 815    }
 816
 817    if (msg.hdr.request != request) {
 818        error_report("Received unexpected msg type. Expected %d received %d",
 819                     request, msg.hdr.request);
 820        return -1;
 821    }
 822
 823    if (msg.hdr.size != sizeof(msg.payload.u64)) {
 824        error_report("Received bad msg size.");
 825        return -1;
 826    }
 827
 828    *u64 = msg.payload.u64;
 829
 830    return 0;
 831}
 832
 833static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
 834{
 835    return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
 836}
 837
 838static int vhost_user_set_owner(struct vhost_dev *dev)
 839{
 840    VhostUserMsg msg = {
 841        .hdr.request = VHOST_USER_SET_OWNER,
 842        .hdr.flags = VHOST_USER_VERSION,
 843    };
 844
 845    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 846        return -1;
 847    }
 848
 849    return 0;
 850}
 851
 852static int vhost_user_reset_device(struct vhost_dev *dev)
 853{
 854    VhostUserMsg msg = {
 855        .hdr.request = VHOST_USER_RESET_OWNER,
 856        .hdr.flags = VHOST_USER_VERSION,
 857    };
 858
 859    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 860        return -1;
 861    }
 862
 863    return 0;
 864}
 865
 866static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
 867{
 868    int ret = -1;
 869
 870    if (!dev->config_ops) {
 871        return -1;
 872    }
 873
 874    if (dev->config_ops->vhost_dev_config_notifier) {
 875        ret = dev->config_ops->vhost_dev_config_notifier(dev);
 876    }
 877
 878    return ret;
 879}
 880
 881static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
 882                                                       VhostUserVringArea *area,
 883                                                       int fd)
 884{
 885    int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
 886    size_t page_size = qemu_real_host_page_size;
 887    struct vhost_user *u = dev->opaque;
 888    VhostUserState *user = u->user;
 889    VirtIODevice *vdev = dev->vdev;
 890    VhostUserHostNotifier *n;
 891    void *addr;
 892    char *name;
 893
 894    if (!virtio_has_feature(dev->protocol_features,
 895                            VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
 896        vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
 897        return -1;
 898    }
 899
 900    n = &user->notifier[queue_idx];
 901
 902    if (n->addr) {
 903        virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
 904        object_unparent(OBJECT(&n->mr));
 905        munmap(n->addr, page_size);
 906        n->addr = NULL;
 907    }
 908
 909    if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
 910        return 0;
 911    }
 912
 913    /* Sanity check. */
 914    if (area->size != page_size) {
 915        return -1;
 916    }
 917
 918    addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
 919                fd, area->offset);
 920    if (addr == MAP_FAILED) {
 921        return -1;
 922    }
 923
 924    name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
 925                           user, queue_idx);
 926    memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
 927                                      page_size, addr);
 928    g_free(name);
 929
 930    if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
 931        munmap(addr, page_size);
 932        return -1;
 933    }
 934
 935    n->addr = addr;
 936    n->set = true;
 937
 938    return 0;
 939}
 940
 941static void slave_read(void *opaque)
 942{
 943    struct vhost_dev *dev = opaque;
 944    struct vhost_user *u = dev->opaque;
 945    VhostUserHeader hdr = { 0, };
 946    VhostUserPayload payload = { 0, };
 947    int size, ret = 0;
 948    struct iovec iov;
 949    struct msghdr msgh;
 950    int fd[VHOST_USER_SLAVE_MAX_FDS];
 951    char control[CMSG_SPACE(sizeof(fd))];
 952    struct cmsghdr *cmsg;
 953    int i, fdsize = 0;
 954
 955    memset(&msgh, 0, sizeof(msgh));
 956    msgh.msg_iov = &iov;
 957    msgh.msg_iovlen = 1;
 958    msgh.msg_control = control;
 959    msgh.msg_controllen = sizeof(control);
 960
 961    memset(fd, -1, sizeof(fd));
 962
 963    /* Read header */
 964    iov.iov_base = &hdr;
 965    iov.iov_len = VHOST_USER_HDR_SIZE;
 966
 967    size = recvmsg(u->slave_fd, &msgh, 0);
 968    if (size != VHOST_USER_HDR_SIZE) {
 969        error_report("Failed to read from slave.");
 970        goto err;
 971    }
 972
 973    if (msgh.msg_flags & MSG_CTRUNC) {
 974        error_report("Truncated message.");
 975        goto err;
 976    }
 977
 978    for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
 979         cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
 980            if (cmsg->cmsg_level == SOL_SOCKET &&
 981                cmsg->cmsg_type == SCM_RIGHTS) {
 982                    fdsize = cmsg->cmsg_len - CMSG_LEN(0);
 983                    memcpy(fd, CMSG_DATA(cmsg), fdsize);
 984                    break;
 985            }
 986    }
 987
 988    if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
 989        error_report("Failed to read msg header."
 990                " Size %d exceeds the maximum %zu.", hdr.size,
 991                VHOST_USER_PAYLOAD_SIZE);
 992        goto err;
 993    }
 994
 995    /* Read payload */
 996    size = read(u->slave_fd, &payload, hdr.size);
 997    if (size != hdr.size) {
 998        error_report("Failed to read payload from slave.");
 999        goto err;
1000    }
1001
1002    switch (hdr.request) {
1003    case VHOST_USER_SLAVE_IOTLB_MSG:
1004        ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1005        break;
1006    case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
1007        ret = vhost_user_slave_handle_config_change(dev);
1008        break;
1009    case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
1010        ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1011                                                          fd[0]);
1012        break;
1013    default:
1014        error_report("Received unexpected msg type.");
1015        ret = -EINVAL;
1016    }
1017
1018    /* Close the remaining file descriptors. */
1019    for (i = 0; i < fdsize; i++) {
1020        if (fd[i] != -1) {
1021            close(fd[i]);
1022        }
1023    }
1024
1025    /*
1026     * REPLY_ACK feature handling. Other reply types has to be managed
1027     * directly in their request handlers.
1028     */
1029    if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1030        struct iovec iovec[2];
1031
1032
1033        hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1034        hdr.flags |= VHOST_USER_REPLY_MASK;
1035
1036        payload.u64 = !!ret;
1037        hdr.size = sizeof(payload.u64);
1038
1039        iovec[0].iov_base = &hdr;
1040        iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1041        iovec[1].iov_base = &payload;
1042        iovec[1].iov_len = hdr.size;
1043
1044        size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
1045        if (size != VHOST_USER_HDR_SIZE + hdr.size) {
1046            error_report("Failed to send msg reply to slave.");
1047            goto err;
1048        }
1049    }
1050
1051    return;
1052
1053err:
1054    qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1055    close(u->slave_fd);
1056    u->slave_fd = -1;
1057    for (i = 0; i < fdsize; i++) {
1058        if (fd[i] != -1) {
1059            close(fd[i]);
1060        }
1061    }
1062    return;
1063}
1064
1065static int vhost_setup_slave_channel(struct vhost_dev *dev)
1066{
1067    VhostUserMsg msg = {
1068        .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
1069        .hdr.flags = VHOST_USER_VERSION,
1070    };
1071    struct vhost_user *u = dev->opaque;
1072    int sv[2], ret = 0;
1073    bool reply_supported = virtio_has_feature(dev->protocol_features,
1074                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
1075
1076    if (!virtio_has_feature(dev->protocol_features,
1077                            VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
1078        return 0;
1079    }
1080
1081    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1082        error_report("socketpair() failed");
1083        return -1;
1084    }
1085
1086    u->slave_fd = sv[0];
1087    qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
1088
1089    if (reply_supported) {
1090        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1091    }
1092
1093    ret = vhost_user_write(dev, &msg, &sv[1], 1);
1094    if (ret) {
1095        goto out;
1096    }
1097
1098    if (reply_supported) {
1099        ret = process_message_reply(dev, &msg);
1100    }
1101
1102out:
1103    close(sv[1]);
1104    if (ret) {
1105        qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1106        close(u->slave_fd);
1107        u->slave_fd = -1;
1108    }
1109
1110    return ret;
1111}
1112
1113/*
1114 * Called back from the postcopy fault thread when a fault is received on our
1115 * ufd.
1116 * TODO: This is Linux specific
1117 */
1118static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1119                                             void *ufd)
1120{
1121    struct vhost_dev *dev = pcfd->data;
1122    struct vhost_user *u = dev->opaque;
1123    struct uffd_msg *msg = ufd;
1124    uint64_t faultaddr = msg->arg.pagefault.address;
1125    RAMBlock *rb = NULL;
1126    uint64_t rb_offset;
1127    int i;
1128
1129    trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1130                                            dev->mem->nregions);
1131    for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1132        trace_vhost_user_postcopy_fault_handler_loop(i,
1133                u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1134        if (faultaddr >= u->postcopy_client_bases[i]) {
1135            /* Ofset of the fault address in the vhost region */
1136            uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1137            if (region_offset < dev->mem->regions[i].memory_size) {
1138                rb_offset = region_offset + u->region_rb_offset[i];
1139                trace_vhost_user_postcopy_fault_handler_found(i,
1140                        region_offset, rb_offset);
1141                rb = u->region_rb[i];
1142                return postcopy_request_shared_page(pcfd, rb, faultaddr,
1143                                                    rb_offset);
1144            }
1145        }
1146    }
1147    error_report("%s: Failed to find region for fault %" PRIx64,
1148                 __func__, faultaddr);
1149    return -1;
1150}
1151
1152static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1153                                     uint64_t offset)
1154{
1155    struct vhost_dev *dev = pcfd->data;
1156    struct vhost_user *u = dev->opaque;
1157    int i;
1158
1159    trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1160
1161    if (!u) {
1162        return 0;
1163    }
1164    /* Translate the offset into an address in the clients address space */
1165    for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1166        if (u->region_rb[i] == rb &&
1167            offset >= u->region_rb_offset[i] &&
1168            offset < (u->region_rb_offset[i] +
1169                      dev->mem->regions[i].memory_size)) {
1170            uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1171                                   u->postcopy_client_bases[i];
1172            trace_vhost_user_postcopy_waker_found(client_addr);
1173            return postcopy_wake_shared(pcfd, client_addr, rb);
1174        }
1175    }
1176
1177    trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1178    return 0;
1179}
1180
1181/*
1182 * Called at the start of an inbound postcopy on reception of the
1183 * 'advise' command.
1184 */
1185static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1186{
1187    struct vhost_user *u = dev->opaque;
1188    CharBackend *chr = u->user->chr;
1189    int ufd;
1190    VhostUserMsg msg = {
1191        .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1192        .hdr.flags = VHOST_USER_VERSION,
1193    };
1194
1195    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1196        error_setg(errp, "Failed to send postcopy_advise to vhost");
1197        return -1;
1198    }
1199
1200    if (vhost_user_read(dev, &msg) < 0) {
1201        error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1202        return -1;
1203    }
1204
1205    if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1206        error_setg(errp, "Unexpected msg type. Expected %d received %d",
1207                     VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1208        return -1;
1209    }
1210
1211    if (msg.hdr.size) {
1212        error_setg(errp, "Received bad msg size.");
1213        return -1;
1214    }
1215    ufd = qemu_chr_fe_get_msgfd(chr);
1216    if (ufd < 0) {
1217        error_setg(errp, "%s: Failed to get ufd", __func__);
1218        return -1;
1219    }
1220    qemu_set_nonblock(ufd);
1221
1222    /* register ufd with userfault thread */
1223    u->postcopy_fd.fd = ufd;
1224    u->postcopy_fd.data = dev;
1225    u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1226    u->postcopy_fd.waker = vhost_user_postcopy_waker;
1227    u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1228    postcopy_register_shared_ufd(&u->postcopy_fd);
1229    return 0;
1230}
1231
1232/*
1233 * Called at the switch to postcopy on reception of the 'listen' command.
1234 */
1235static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1236{
1237    struct vhost_user *u = dev->opaque;
1238    int ret;
1239    VhostUserMsg msg = {
1240        .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1241        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1242    };
1243    u->postcopy_listen = true;
1244    trace_vhost_user_postcopy_listen();
1245    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1246        error_setg(errp, "Failed to send postcopy_listen to vhost");
1247        return -1;
1248    }
1249
1250    ret = process_message_reply(dev, &msg);
1251    if (ret) {
1252        error_setg(errp, "Failed to receive reply to postcopy_listen");
1253        return ret;
1254    }
1255
1256    return 0;
1257}
1258
1259/*
1260 * Called at the end of postcopy
1261 */
1262static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1263{
1264    VhostUserMsg msg = {
1265        .hdr.request = VHOST_USER_POSTCOPY_END,
1266        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1267    };
1268    int ret;
1269    struct vhost_user *u = dev->opaque;
1270
1271    trace_vhost_user_postcopy_end_entry();
1272    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1273        error_setg(errp, "Failed to send postcopy_end to vhost");
1274        return -1;
1275    }
1276
1277    ret = process_message_reply(dev, &msg);
1278    if (ret) {
1279        error_setg(errp, "Failed to receive reply to postcopy_end");
1280        return ret;
1281    }
1282    postcopy_unregister_shared_ufd(&u->postcopy_fd);
1283    close(u->postcopy_fd.fd);
1284    u->postcopy_fd.handler = NULL;
1285
1286    trace_vhost_user_postcopy_end_exit();
1287
1288    return 0;
1289}
1290
1291static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1292                                        void *opaque)
1293{
1294    struct PostcopyNotifyData *pnd = opaque;
1295    struct vhost_user *u = container_of(notifier, struct vhost_user,
1296                                         postcopy_notifier);
1297    struct vhost_dev *dev = u->dev;
1298
1299    switch (pnd->reason) {
1300    case POSTCOPY_NOTIFY_PROBE:
1301        if (!virtio_has_feature(dev->protocol_features,
1302                                VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1303            /* TODO: Get the device name into this error somehow */
1304            error_setg(pnd->errp,
1305                       "vhost-user backend not capable of postcopy");
1306            return -ENOENT;
1307        }
1308        break;
1309
1310    case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1311        return vhost_user_postcopy_advise(dev, pnd->errp);
1312
1313    case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1314        return vhost_user_postcopy_listen(dev, pnd->errp);
1315
1316    case POSTCOPY_NOTIFY_INBOUND_END:
1317        return vhost_user_postcopy_end(dev, pnd->errp);
1318
1319    default:
1320        /* We ignore notifications we don't know */
1321        break;
1322    }
1323
1324    return 0;
1325}
1326
1327static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
1328{
1329    uint64_t features, protocol_features;
1330    struct vhost_user *u;
1331    int err;
1332
1333    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1334
1335    u = g_new0(struct vhost_user, 1);
1336    u->user = opaque;
1337    u->slave_fd = -1;
1338    u->dev = dev;
1339    dev->opaque = u;
1340
1341    err = vhost_user_get_features(dev, &features);
1342    if (err < 0) {
1343        return err;
1344    }
1345
1346    if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1347        dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1348
1349        err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1350                                 &protocol_features);
1351        if (err < 0) {
1352            return err;
1353        }
1354
1355        dev->protocol_features =
1356            protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1357
1358        if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1359            /* Don't acknowledge CONFIG feature if device doesn't support it */
1360            dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1361        } else if (!(protocol_features &
1362                    (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1363            error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1364                    "but backend does not support it.");
1365            return -1;
1366        }
1367
1368        err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1369        if (err < 0) {
1370            return err;
1371        }
1372
1373        /* query the max queues we support if backend supports Multiple Queue */
1374        if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1375            err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1376                                     &dev->max_queues);
1377            if (err < 0) {
1378                return err;
1379            }
1380        }
1381
1382        if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1383                !(virtio_has_feature(dev->protocol_features,
1384                    VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1385                 virtio_has_feature(dev->protocol_features,
1386                    VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1387            error_report("IOMMU support requires reply-ack and "
1388                         "slave-req protocol features.");
1389            return -1;
1390        }
1391    }
1392
1393    if (dev->migration_blocker == NULL &&
1394        !virtio_has_feature(dev->protocol_features,
1395                            VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1396        error_setg(&dev->migration_blocker,
1397                   "Migration disabled: vhost-user backend lacks "
1398                   "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1399    }
1400
1401    err = vhost_setup_slave_channel(dev);
1402    if (err < 0) {
1403        return err;
1404    }
1405
1406    u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1407    postcopy_add_notifier(&u->postcopy_notifier);
1408
1409    return 0;
1410}
1411
1412static int vhost_user_backend_cleanup(struct vhost_dev *dev)
1413{
1414    struct vhost_user *u;
1415
1416    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1417
1418    u = dev->opaque;
1419    if (u->postcopy_notifier.notify) {
1420        postcopy_remove_notifier(&u->postcopy_notifier);
1421        u->postcopy_notifier.notify = NULL;
1422    }
1423    u->postcopy_listen = false;
1424    if (u->postcopy_fd.handler) {
1425        postcopy_unregister_shared_ufd(&u->postcopy_fd);
1426        close(u->postcopy_fd.fd);
1427        u->postcopy_fd.handler = NULL;
1428    }
1429    if (u->slave_fd >= 0) {
1430        qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1431        close(u->slave_fd);
1432        u->slave_fd = -1;
1433    }
1434    g_free(u->region_rb);
1435    u->region_rb = NULL;
1436    g_free(u->region_rb_offset);
1437    u->region_rb_offset = NULL;
1438    u->region_rb_len = 0;
1439    g_free(u);
1440    dev->opaque = 0;
1441
1442    return 0;
1443}
1444
1445static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1446{
1447    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1448
1449    return idx;
1450}
1451
1452static int vhost_user_memslots_limit(struct vhost_dev *dev)
1453{
1454    return VHOST_MEMORY_MAX_NREGIONS;
1455}
1456
1457static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1458{
1459    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1460
1461    return virtio_has_feature(dev->protocol_features,
1462                              VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1463}
1464
1465static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1466{
1467    VhostUserMsg msg = { };
1468
1469    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1470
1471    /* If guest supports GUEST_ANNOUNCE do nothing */
1472    if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1473        return 0;
1474    }
1475
1476    /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1477    if (virtio_has_feature(dev->protocol_features,
1478                           VHOST_USER_PROTOCOL_F_RARP)) {
1479        msg.hdr.request = VHOST_USER_SEND_RARP;
1480        msg.hdr.flags = VHOST_USER_VERSION;
1481        memcpy((char *)&msg.payload.u64, mac_addr, 6);
1482        msg.hdr.size = sizeof(msg.payload.u64);
1483
1484        return vhost_user_write(dev, &msg, NULL, 0);
1485    }
1486    return -1;
1487}
1488
1489static bool vhost_user_can_merge(struct vhost_dev *dev,
1490                                 uint64_t start1, uint64_t size1,
1491                                 uint64_t start2, uint64_t size2)
1492{
1493    ram_addr_t offset;
1494    int mfd, rfd;
1495    MemoryRegion *mr;
1496
1497    mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
1498    mfd = memory_region_get_fd(mr);
1499
1500    mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
1501    rfd = memory_region_get_fd(mr);
1502
1503    return mfd == rfd;
1504}
1505
1506static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1507{
1508    VhostUserMsg msg;
1509    bool reply_supported = virtio_has_feature(dev->protocol_features,
1510                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
1511
1512    if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
1513        return 0;
1514    }
1515
1516    msg.hdr.request = VHOST_USER_NET_SET_MTU;
1517    msg.payload.u64 = mtu;
1518    msg.hdr.size = sizeof(msg.payload.u64);
1519    msg.hdr.flags = VHOST_USER_VERSION;
1520    if (reply_supported) {
1521        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1522    }
1523
1524    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1525        return -1;
1526    }
1527
1528    /* If reply_ack supported, slave has to ack specified MTU is valid */
1529    if (reply_supported) {
1530        return process_message_reply(dev, &msg);
1531    }
1532
1533    return 0;
1534}
1535
1536static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
1537                                            struct vhost_iotlb_msg *imsg)
1538{
1539    VhostUserMsg msg = {
1540        .hdr.request = VHOST_USER_IOTLB_MSG,
1541        .hdr.size = sizeof(msg.payload.iotlb),
1542        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1543        .payload.iotlb = *imsg,
1544    };
1545
1546    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1547        return -EFAULT;
1548    }
1549
1550    return process_message_reply(dev, &msg);
1551}
1552
1553
1554static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
1555{
1556    /* No-op as the receive channel is not dedicated to IOTLB messages. */
1557}
1558
1559static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
1560                                 uint32_t config_len)
1561{
1562    VhostUserMsg msg = {
1563        .hdr.request = VHOST_USER_GET_CONFIG,
1564        .hdr.flags = VHOST_USER_VERSION,
1565        .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
1566    };
1567
1568    if (!virtio_has_feature(dev->protocol_features,
1569                VHOST_USER_PROTOCOL_F_CONFIG)) {
1570        return -1;
1571    }
1572
1573    if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
1574        return -1;
1575    }
1576
1577    msg.payload.config.offset = 0;
1578    msg.payload.config.size = config_len;
1579    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1580        return -1;
1581    }
1582
1583    if (vhost_user_read(dev, &msg) < 0) {
1584        return -1;
1585    }
1586
1587    if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
1588        error_report("Received unexpected msg type. Expected %d received %d",
1589                     VHOST_USER_GET_CONFIG, msg.hdr.request);
1590        return -1;
1591    }
1592
1593    if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
1594        error_report("Received bad msg size.");
1595        return -1;
1596    }
1597
1598    memcpy(config, msg.payload.config.region, config_len);
1599
1600    return 0;
1601}
1602
1603static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
1604                                 uint32_t offset, uint32_t size, uint32_t flags)
1605{
1606    uint8_t *p;
1607    bool reply_supported = virtio_has_feature(dev->protocol_features,
1608                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
1609
1610    VhostUserMsg msg = {
1611        .hdr.request = VHOST_USER_SET_CONFIG,
1612        .hdr.flags = VHOST_USER_VERSION,
1613        .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
1614    };
1615
1616    if (!virtio_has_feature(dev->protocol_features,
1617                VHOST_USER_PROTOCOL_F_CONFIG)) {
1618        return -1;
1619    }
1620
1621    if (reply_supported) {
1622        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1623    }
1624
1625    if (size > VHOST_USER_MAX_CONFIG_SIZE) {
1626        return -1;
1627    }
1628
1629    msg.payload.config.offset = offset,
1630    msg.payload.config.size = size,
1631    msg.payload.config.flags = flags,
1632    p = msg.payload.config.region;
1633    memcpy(p, data, size);
1634
1635    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1636        return -1;
1637    }
1638
1639    if (reply_supported) {
1640        return process_message_reply(dev, &msg);
1641    }
1642
1643    return 0;
1644}
1645
1646static int vhost_user_crypto_create_session(struct vhost_dev *dev,
1647                                            void *session_info,
1648                                            uint64_t *session_id)
1649{
1650    bool crypto_session = virtio_has_feature(dev->protocol_features,
1651                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1652    CryptoDevBackendSymSessionInfo *sess_info = session_info;
1653    VhostUserMsg msg = {
1654        .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
1655        .hdr.flags = VHOST_USER_VERSION,
1656        .hdr.size = sizeof(msg.payload.session),
1657    };
1658
1659    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1660
1661    if (!crypto_session) {
1662        error_report("vhost-user trying to send unhandled ioctl");
1663        return -1;
1664    }
1665
1666    memcpy(&msg.payload.session.session_setup_data, sess_info,
1667              sizeof(CryptoDevBackendSymSessionInfo));
1668    if (sess_info->key_len) {
1669        memcpy(&msg.payload.session.key, sess_info->cipher_key,
1670               sess_info->key_len);
1671    }
1672    if (sess_info->auth_key_len > 0) {
1673        memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
1674               sess_info->auth_key_len);
1675    }
1676    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1677        error_report("vhost_user_write() return -1, create session failed");
1678        return -1;
1679    }
1680
1681    if (vhost_user_read(dev, &msg) < 0) {
1682        error_report("vhost_user_read() return -1, create session failed");
1683        return -1;
1684    }
1685
1686    if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
1687        error_report("Received unexpected msg type. Expected %d received %d",
1688                     VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
1689        return -1;
1690    }
1691
1692    if (msg.hdr.size != sizeof(msg.payload.session)) {
1693        error_report("Received bad msg size.");
1694        return -1;
1695    }
1696
1697    if (msg.payload.session.session_id < 0) {
1698        error_report("Bad session id: %" PRId64 "",
1699                              msg.payload.session.session_id);
1700        return -1;
1701    }
1702    *session_id = msg.payload.session.session_id;
1703
1704    return 0;
1705}
1706
1707static int
1708vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
1709{
1710    bool crypto_session = virtio_has_feature(dev->protocol_features,
1711                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1712    VhostUserMsg msg = {
1713        .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
1714        .hdr.flags = VHOST_USER_VERSION,
1715        .hdr.size = sizeof(msg.payload.u64),
1716    };
1717    msg.payload.u64 = session_id;
1718
1719    if (!crypto_session) {
1720        error_report("vhost-user trying to send unhandled ioctl");
1721        return -1;
1722    }
1723
1724    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1725        error_report("vhost_user_write() return -1, close session failed");
1726        return -1;
1727    }
1728
1729    return 0;
1730}
1731
1732static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
1733                                          MemoryRegionSection *section)
1734{
1735    bool result;
1736
1737    result = memory_region_get_fd(section->mr) >= 0;
1738
1739    return result;
1740}
1741
1742VhostUserState *vhost_user_init(void)
1743{
1744    VhostUserState *user = g_new0(struct VhostUserState, 1);
1745
1746    return user;
1747}
1748
1749void vhost_user_cleanup(VhostUserState *user)
1750{
1751    int i;
1752
1753    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1754        if (user->notifier[i].addr) {
1755            object_unparent(OBJECT(&user->notifier[i].mr));
1756            munmap(user->notifier[i].addr, qemu_real_host_page_size);
1757            user->notifier[i].addr = NULL;
1758        }
1759    }
1760}
1761
1762const VhostOps user_ops = {
1763        .backend_type = VHOST_BACKEND_TYPE_USER,
1764        .vhost_backend_init = vhost_user_backend_init,
1765        .vhost_backend_cleanup = vhost_user_backend_cleanup,
1766        .vhost_backend_memslots_limit = vhost_user_memslots_limit,
1767        .vhost_set_log_base = vhost_user_set_log_base,
1768        .vhost_set_mem_table = vhost_user_set_mem_table,
1769        .vhost_set_vring_addr = vhost_user_set_vring_addr,
1770        .vhost_set_vring_endian = vhost_user_set_vring_endian,
1771        .vhost_set_vring_num = vhost_user_set_vring_num,
1772        .vhost_set_vring_base = vhost_user_set_vring_base,
1773        .vhost_get_vring_base = vhost_user_get_vring_base,
1774        .vhost_set_vring_kick = vhost_user_set_vring_kick,
1775        .vhost_set_vring_call = vhost_user_set_vring_call,
1776        .vhost_set_features = vhost_user_set_features,
1777        .vhost_get_features = vhost_user_get_features,
1778        .vhost_set_owner = vhost_user_set_owner,
1779        .vhost_reset_device = vhost_user_reset_device,
1780        .vhost_get_vq_index = vhost_user_get_vq_index,
1781        .vhost_set_vring_enable = vhost_user_set_vring_enable,
1782        .vhost_requires_shm_log = vhost_user_requires_shm_log,
1783        .vhost_migration_done = vhost_user_migration_done,
1784        .vhost_backend_can_merge = vhost_user_can_merge,
1785        .vhost_net_set_mtu = vhost_user_net_set_mtu,
1786        .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
1787        .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
1788        .vhost_get_config = vhost_user_get_config,
1789        .vhost_set_config = vhost_user_set_config,
1790        .vhost_crypto_create_session = vhost_user_crypto_create_session,
1791        .vhost_crypto_close_session = vhost_user_crypto_close_session,
1792        .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
1793};
1794