qemu/hw/virtio/vhost-user.c
<<
>>
Prefs
   1/*
   2 * vhost-user
   3 *
   4 * Copyright (c) 2013 Virtual Open Systems Sarl.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 *
   9 */
  10
  11#include "qemu/osdep.h"
  12#include "qapi/error.h"
  13#include "hw/virtio/vhost.h"
  14#include "hw/virtio/vhost-user.h"
  15#include "hw/virtio/vhost-backend.h"
  16#include "hw/virtio/virtio.h"
  17#include "hw/virtio/virtio-net.h"
  18#include "chardev/char-fe.h"
  19#include "sysemu/kvm.h"
  20#include "qemu/error-report.h"
  21#include "qemu/sockets.h"
  22#include "sysemu/cryptodev.h"
  23#include "migration/migration.h"
  24#include "migration/postcopy-ram.h"
  25#include "trace.h"
  26
  27#include <sys/ioctl.h>
  28#include <sys/socket.h>
  29#include <sys/un.h>
  30#include <linux/vhost.h>
  31#include <linux/userfaultfd.h>
  32
  33#define VHOST_MEMORY_MAX_NREGIONS    8
  34#define VHOST_USER_F_PROTOCOL_FEATURES 30
  35#define VHOST_USER_SLAVE_MAX_FDS     8
  36
  37/*
  38 * Maximum size of virtio device config space
  39 */
  40#define VHOST_USER_MAX_CONFIG_SIZE 256
  41
  42enum VhostUserProtocolFeature {
  43    VHOST_USER_PROTOCOL_F_MQ = 0,
  44    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
  45    VHOST_USER_PROTOCOL_F_RARP = 2,
  46    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
  47    VHOST_USER_PROTOCOL_F_NET_MTU = 4,
  48    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
  49    VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
  50    VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
  51    VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
  52    VHOST_USER_PROTOCOL_F_CONFIG = 9,
  53    VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
  54    VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
  55    VHOST_USER_PROTOCOL_F_MAX
  56};
  57
  58#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
  59
  60typedef enum VhostUserRequest {
  61    VHOST_USER_NONE = 0,
  62    VHOST_USER_GET_FEATURES = 1,
  63    VHOST_USER_SET_FEATURES = 2,
  64    VHOST_USER_SET_OWNER = 3,
  65    VHOST_USER_RESET_OWNER = 4,
  66    VHOST_USER_SET_MEM_TABLE = 5,
  67    VHOST_USER_SET_LOG_BASE = 6,
  68    VHOST_USER_SET_LOG_FD = 7,
  69    VHOST_USER_SET_VRING_NUM = 8,
  70    VHOST_USER_SET_VRING_ADDR = 9,
  71    VHOST_USER_SET_VRING_BASE = 10,
  72    VHOST_USER_GET_VRING_BASE = 11,
  73    VHOST_USER_SET_VRING_KICK = 12,
  74    VHOST_USER_SET_VRING_CALL = 13,
  75    VHOST_USER_SET_VRING_ERR = 14,
  76    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
  77    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
  78    VHOST_USER_GET_QUEUE_NUM = 17,
  79    VHOST_USER_SET_VRING_ENABLE = 18,
  80    VHOST_USER_SEND_RARP = 19,
  81    VHOST_USER_NET_SET_MTU = 20,
  82    VHOST_USER_SET_SLAVE_REQ_FD = 21,
  83    VHOST_USER_IOTLB_MSG = 22,
  84    VHOST_USER_SET_VRING_ENDIAN = 23,
  85    VHOST_USER_GET_CONFIG = 24,
  86    VHOST_USER_SET_CONFIG = 25,
  87    VHOST_USER_CREATE_CRYPTO_SESSION = 26,
  88    VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
  89    VHOST_USER_POSTCOPY_ADVISE  = 28,
  90    VHOST_USER_POSTCOPY_LISTEN  = 29,
  91    VHOST_USER_POSTCOPY_END     = 30,
  92    VHOST_USER_MAX
  93} VhostUserRequest;
  94
  95typedef enum VhostUserSlaveRequest {
  96    VHOST_USER_SLAVE_NONE = 0,
  97    VHOST_USER_SLAVE_IOTLB_MSG = 1,
  98    VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
  99    VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
 100    VHOST_USER_SLAVE_MAX
 101}  VhostUserSlaveRequest;
 102
 103typedef struct VhostUserMemoryRegion {
 104    uint64_t guest_phys_addr;
 105    uint64_t memory_size;
 106    uint64_t userspace_addr;
 107    uint64_t mmap_offset;
 108} VhostUserMemoryRegion;
 109
 110typedef struct VhostUserMemory {
 111    uint32_t nregions;
 112    uint32_t padding;
 113    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 114} VhostUserMemory;
 115
 116typedef struct VhostUserLog {
 117    uint64_t mmap_size;
 118    uint64_t mmap_offset;
 119} VhostUserLog;
 120
 121typedef struct VhostUserConfig {
 122    uint32_t offset;
 123    uint32_t size;
 124    uint32_t flags;
 125    uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
 126} VhostUserConfig;
 127
 128#define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
 129#define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
 130
 131typedef struct VhostUserCryptoSession {
 132    /* session id for success, -1 on errors */
 133    int64_t session_id;
 134    CryptoDevBackendSymSessionInfo session_setup_data;
 135    uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
 136    uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
 137} VhostUserCryptoSession;
 138
 139static VhostUserConfig c __attribute__ ((unused));
 140#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
 141                                   + sizeof(c.size) \
 142                                   + sizeof(c.flags))
 143
 144typedef struct VhostUserVringArea {
 145    uint64_t u64;
 146    uint64_t size;
 147    uint64_t offset;
 148} VhostUserVringArea;
 149
 150typedef struct {
 151    VhostUserRequest request;
 152
 153#define VHOST_USER_VERSION_MASK     (0x3)
 154#define VHOST_USER_REPLY_MASK       (0x1<<2)
 155#define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
 156    uint32_t flags;
 157    uint32_t size; /* the following payload size */
 158} QEMU_PACKED VhostUserHeader;
 159
 160typedef union {
 161#define VHOST_USER_VRING_IDX_MASK   (0xff)
 162#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
 163        uint64_t u64;
 164        struct vhost_vring_state state;
 165        struct vhost_vring_addr addr;
 166        VhostUserMemory memory;
 167        VhostUserLog log;
 168        struct vhost_iotlb_msg iotlb;
 169        VhostUserConfig config;
 170        VhostUserCryptoSession session;
 171        VhostUserVringArea area;
 172} VhostUserPayload;
 173
 174typedef struct VhostUserMsg {
 175    VhostUserHeader hdr;
 176    VhostUserPayload payload;
 177} QEMU_PACKED VhostUserMsg;
 178
 179static VhostUserMsg m __attribute__ ((unused));
 180#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
 181
 182#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
 183
 184/* The version of the protocol we support */
 185#define VHOST_USER_VERSION    (0x1)
 186
 187struct vhost_user {
 188    struct vhost_dev *dev;
 189    /* Shared between vhost devs of the same virtio device */
 190    VhostUserState *user;
 191    int slave_fd;
 192    NotifierWithReturn postcopy_notifier;
 193    struct PostCopyFD  postcopy_fd;
 194    uint64_t           postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
 195    /* Length of the region_rb and region_rb_offset arrays */
 196    size_t             region_rb_len;
 197    /* RAMBlock associated with a given region */
 198    RAMBlock         **region_rb;
 199    /* The offset from the start of the RAMBlock to the start of the
 200     * vhost region.
 201     */
 202    ram_addr_t        *region_rb_offset;
 203
 204    /* True once we've entered postcopy_listen */
 205    bool               postcopy_listen;
 206};
 207
 208static bool ioeventfd_enabled(void)
 209{
 210    return kvm_enabled() && kvm_eventfds_enabled();
 211}
 212
 213static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
 214{
 215    struct vhost_user *u = dev->opaque;
 216    CharBackend *chr = u->user->chr;
 217    uint8_t *p = (uint8_t *) msg;
 218    int r, size = VHOST_USER_HDR_SIZE;
 219
 220    r = qemu_chr_fe_read_all(chr, p, size);
 221    if (r != size) {
 222        error_report("Failed to read msg header. Read %d instead of %d."
 223                     " Original request %d.", r, size, msg->hdr.request);
 224        goto fail;
 225    }
 226
 227    /* validate received flags */
 228    if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
 229        error_report("Failed to read msg header."
 230                " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
 231                VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
 232        goto fail;
 233    }
 234
 235    /* validate message size is sane */
 236    if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
 237        error_report("Failed to read msg header."
 238                " Size %d exceeds the maximum %zu.", msg->hdr.size,
 239                VHOST_USER_PAYLOAD_SIZE);
 240        goto fail;
 241    }
 242
 243    if (msg->hdr.size) {
 244        p += VHOST_USER_HDR_SIZE;
 245        size = msg->hdr.size;
 246        r = qemu_chr_fe_read_all(chr, p, size);
 247        if (r != size) {
 248            error_report("Failed to read msg payload."
 249                         " Read %d instead of %d.", r, msg->hdr.size);
 250            goto fail;
 251        }
 252    }
 253
 254    return 0;
 255
 256fail:
 257    return -1;
 258}
 259
 260static int process_message_reply(struct vhost_dev *dev,
 261                                 const VhostUserMsg *msg)
 262{
 263    VhostUserMsg msg_reply;
 264
 265    if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
 266        return 0;
 267    }
 268
 269    if (vhost_user_read(dev, &msg_reply) < 0) {
 270        return -1;
 271    }
 272
 273    if (msg_reply.hdr.request != msg->hdr.request) {
 274        error_report("Received unexpected msg type."
 275                     "Expected %d received %d",
 276                     msg->hdr.request, msg_reply.hdr.request);
 277        return -1;
 278    }
 279
 280    return msg_reply.payload.u64 ? -1 : 0;
 281}
 282
 283static bool vhost_user_one_time_request(VhostUserRequest request)
 284{
 285    switch (request) {
 286    case VHOST_USER_SET_OWNER:
 287    case VHOST_USER_RESET_OWNER:
 288    case VHOST_USER_SET_MEM_TABLE:
 289    case VHOST_USER_GET_QUEUE_NUM:
 290    case VHOST_USER_NET_SET_MTU:
 291        return true;
 292    default:
 293        return false;
 294    }
 295}
 296
 297/* most non-init callers ignore the error */
 298static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
 299                            int *fds, int fd_num)
 300{
 301    struct vhost_user *u = dev->opaque;
 302    CharBackend *chr = u->user->chr;
 303    int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
 304
 305    /*
 306     * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
 307     * we just need send it once in the first time. For later such
 308     * request, we just ignore it.
 309     */
 310    if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
 311        msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
 312        return 0;
 313    }
 314
 315    if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
 316        error_report("Failed to set msg fds.");
 317        return -1;
 318    }
 319
 320    ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
 321    if (ret != size) {
 322        error_report("Failed to write msg."
 323                     " Wrote %d instead of %d.", ret, size);
 324        return -1;
 325    }
 326
 327    return 0;
 328}
 329
 330static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
 331                                   struct vhost_log *log)
 332{
 333    int fds[VHOST_MEMORY_MAX_NREGIONS];
 334    size_t fd_num = 0;
 335    bool shmfd = virtio_has_feature(dev->protocol_features,
 336                                    VHOST_USER_PROTOCOL_F_LOG_SHMFD);
 337    VhostUserMsg msg = {
 338        .hdr.request = VHOST_USER_SET_LOG_BASE,
 339        .hdr.flags = VHOST_USER_VERSION,
 340        .payload.log.mmap_size = log->size * sizeof(*(log->log)),
 341        .payload.log.mmap_offset = 0,
 342        .hdr.size = sizeof(msg.payload.log),
 343    };
 344
 345    if (shmfd && log->fd != -1) {
 346        fds[fd_num++] = log->fd;
 347    }
 348
 349    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 350        return -1;
 351    }
 352
 353    if (shmfd) {
 354        msg.hdr.size = 0;
 355        if (vhost_user_read(dev, &msg) < 0) {
 356            return -1;
 357        }
 358
 359        if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
 360            error_report("Received unexpected msg type. "
 361                         "Expected %d received %d",
 362                         VHOST_USER_SET_LOG_BASE, msg.hdr.request);
 363            return -1;
 364        }
 365    }
 366
 367    return 0;
 368}
 369
 370static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
 371                                             struct vhost_memory *mem)
 372{
 373    struct vhost_user *u = dev->opaque;
 374    int fds[VHOST_MEMORY_MAX_NREGIONS];
 375    int i, fd;
 376    size_t fd_num = 0;
 377    bool reply_supported = virtio_has_feature(dev->protocol_features,
 378                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
 379    VhostUserMsg msg_reply;
 380    int region_i, msg_i;
 381
 382    VhostUserMsg msg = {
 383        .hdr.request = VHOST_USER_SET_MEM_TABLE,
 384        .hdr.flags = VHOST_USER_VERSION,
 385    };
 386
 387    if (reply_supported) {
 388        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
 389    }
 390
 391    if (u->region_rb_len < dev->mem->nregions) {
 392        u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
 393        u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
 394                                      dev->mem->nregions);
 395        memset(&(u->region_rb[u->region_rb_len]), '\0',
 396               sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
 397        memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
 398               sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
 399        u->region_rb_len = dev->mem->nregions;
 400    }
 401
 402    for (i = 0; i < dev->mem->nregions; ++i) {
 403        struct vhost_memory_region *reg = dev->mem->regions + i;
 404        ram_addr_t offset;
 405        MemoryRegion *mr;
 406
 407        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
 408        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
 409                                     &offset);
 410        fd = memory_region_get_fd(mr);
 411        if (fd > 0) {
 412            trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
 413                                                  reg->memory_size,
 414                                                  reg->guest_phys_addr,
 415                                                  reg->userspace_addr, offset);
 416            u->region_rb_offset[i] = offset;
 417            u->region_rb[i] = mr->ram_block;
 418            msg.payload.memory.regions[fd_num].userspace_addr =
 419                reg->userspace_addr;
 420            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
 421            msg.payload.memory.regions[fd_num].guest_phys_addr =
 422                reg->guest_phys_addr;
 423            msg.payload.memory.regions[fd_num].mmap_offset = offset;
 424            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
 425            fds[fd_num++] = fd;
 426        } else {
 427            u->region_rb_offset[i] = 0;
 428            u->region_rb[i] = NULL;
 429        }
 430    }
 431
 432    msg.payload.memory.nregions = fd_num;
 433
 434    if (!fd_num) {
 435        error_report("Failed initializing vhost-user memory map, "
 436                     "consider using -object memory-backend-file share=on");
 437        return -1;
 438    }
 439
 440    msg.hdr.size = sizeof(msg.payload.memory.nregions);
 441    msg.hdr.size += sizeof(msg.payload.memory.padding);
 442    msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
 443
 444    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 445        return -1;
 446    }
 447
 448    if (vhost_user_read(dev, &msg_reply) < 0) {
 449        return -1;
 450    }
 451
 452    if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
 453        error_report("%s: Received unexpected msg type."
 454                     "Expected %d received %d", __func__,
 455                     VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
 456        return -1;
 457    }
 458    /* We're using the same structure, just reusing one of the
 459     * fields, so it should be the same size.
 460     */
 461    if (msg_reply.hdr.size != msg.hdr.size) {
 462        error_report("%s: Unexpected size for postcopy reply "
 463                     "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size);
 464        return -1;
 465    }
 466
 467    memset(u->postcopy_client_bases, 0,
 468           sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
 469
 470    /* They're in the same order as the regions that were sent
 471     * but some of the regions were skipped (above) if they
 472     * didn't have fd's
 473    */
 474    for (msg_i = 0, region_i = 0;
 475         region_i < dev->mem->nregions;
 476        region_i++) {
 477        if (msg_i < fd_num &&
 478            msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
 479            dev->mem->regions[region_i].guest_phys_addr) {
 480            u->postcopy_client_bases[region_i] =
 481                msg_reply.payload.memory.regions[msg_i].userspace_addr;
 482            trace_vhost_user_set_mem_table_postcopy(
 483                msg_reply.payload.memory.regions[msg_i].userspace_addr,
 484                msg.payload.memory.regions[msg_i].userspace_addr,
 485                msg_i, region_i);
 486            msg_i++;
 487        }
 488    }
 489    if (msg_i != fd_num) {
 490        error_report("%s: postcopy reply not fully consumed "
 491                     "%d vs %zd",
 492                     __func__, msg_i, fd_num);
 493        return -1;
 494    }
 495    /* Now we've registered this with the postcopy code, we ack to the client,
 496     * because now we're in the position to be able to deal with any faults
 497     * it generates.
 498     */
 499    /* TODO: Use this for failure cases as well with a bad value */
 500    msg.hdr.size = sizeof(msg.payload.u64);
 501    msg.payload.u64 = 0; /* OK */
 502    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 503        return -1;
 504    }
 505
 506    if (reply_supported) {
 507        return process_message_reply(dev, &msg);
 508    }
 509
 510    return 0;
 511}
 512
 513static int vhost_user_set_mem_table(struct vhost_dev *dev,
 514                                    struct vhost_memory *mem)
 515{
 516    struct vhost_user *u = dev->opaque;
 517    int fds[VHOST_MEMORY_MAX_NREGIONS];
 518    int i, fd;
 519    size_t fd_num = 0;
 520    bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
 521    bool reply_supported = virtio_has_feature(dev->protocol_features,
 522                                          VHOST_USER_PROTOCOL_F_REPLY_ACK) &&
 523                                          !do_postcopy;
 524
 525    if (do_postcopy) {
 526        /* Postcopy has enough differences that it's best done in it's own
 527         * version
 528         */
 529        return vhost_user_set_mem_table_postcopy(dev, mem);
 530    }
 531
 532    VhostUserMsg msg = {
 533        .hdr.request = VHOST_USER_SET_MEM_TABLE,
 534        .hdr.flags = VHOST_USER_VERSION,
 535    };
 536
 537    if (reply_supported) {
 538        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
 539    }
 540
 541    for (i = 0; i < dev->mem->nregions; ++i) {
 542        struct vhost_memory_region *reg = dev->mem->regions + i;
 543        ram_addr_t offset;
 544        MemoryRegion *mr;
 545
 546        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
 547        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
 548                                     &offset);
 549        fd = memory_region_get_fd(mr);
 550        if (fd > 0) {
 551            if (fd_num == VHOST_MEMORY_MAX_NREGIONS) {
 552                error_report("Failed preparing vhost-user memory table msg");
 553                return -1;
 554            }
 555            msg.payload.memory.regions[fd_num].userspace_addr =
 556                reg->userspace_addr;
 557            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
 558            msg.payload.memory.regions[fd_num].guest_phys_addr =
 559                reg->guest_phys_addr;
 560            msg.payload.memory.regions[fd_num].mmap_offset = offset;
 561            fds[fd_num++] = fd;
 562        }
 563    }
 564
 565    msg.payload.memory.nregions = fd_num;
 566
 567    if (!fd_num) {
 568        error_report("Failed initializing vhost-user memory map, "
 569                     "consider using -object memory-backend-file share=on");
 570        return -1;
 571    }
 572
 573    msg.hdr.size = sizeof(msg.payload.memory.nregions);
 574    msg.hdr.size += sizeof(msg.payload.memory.padding);
 575    msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
 576
 577    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 578        return -1;
 579    }
 580
 581    if (reply_supported) {
 582        return process_message_reply(dev, &msg);
 583    }
 584
 585    return 0;
 586}
 587
 588static int vhost_user_set_vring_addr(struct vhost_dev *dev,
 589                                     struct vhost_vring_addr *addr)
 590{
 591    VhostUserMsg msg = {
 592        .hdr.request = VHOST_USER_SET_VRING_ADDR,
 593        .hdr.flags = VHOST_USER_VERSION,
 594        .payload.addr = *addr,
 595        .hdr.size = sizeof(msg.payload.addr),
 596    };
 597
 598    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 599        return -1;
 600    }
 601
 602    return 0;
 603}
 604
 605static int vhost_user_set_vring_endian(struct vhost_dev *dev,
 606                                       struct vhost_vring_state *ring)
 607{
 608    bool cross_endian = virtio_has_feature(dev->protocol_features,
 609                                           VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
 610    VhostUserMsg msg = {
 611        .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
 612        .hdr.flags = VHOST_USER_VERSION,
 613        .payload.state = *ring,
 614        .hdr.size = sizeof(msg.payload.state),
 615    };
 616
 617    if (!cross_endian) {
 618        error_report("vhost-user trying to send unhandled ioctl");
 619        return -1;
 620    }
 621
 622    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 623        return -1;
 624    }
 625
 626    return 0;
 627}
 628
 629static int vhost_set_vring(struct vhost_dev *dev,
 630                           unsigned long int request,
 631                           struct vhost_vring_state *ring)
 632{
 633    VhostUserMsg msg = {
 634        .hdr.request = request,
 635        .hdr.flags = VHOST_USER_VERSION,
 636        .payload.state = *ring,
 637        .hdr.size = sizeof(msg.payload.state),
 638    };
 639
 640    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 641        return -1;
 642    }
 643
 644    return 0;
 645}
 646
 647static int vhost_user_set_vring_num(struct vhost_dev *dev,
 648                                    struct vhost_vring_state *ring)
 649{
 650    return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
 651}
 652
 653static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
 654                                             int queue_idx)
 655{
 656    struct vhost_user *u = dev->opaque;
 657    VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
 658    VirtIODevice *vdev = dev->vdev;
 659
 660    if (n->addr && !n->set) {
 661        virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
 662        n->set = true;
 663    }
 664}
 665
 666static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
 667                                            int queue_idx)
 668{
 669    struct vhost_user *u = dev->opaque;
 670    VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
 671    VirtIODevice *vdev = dev->vdev;
 672
 673    if (n->addr && n->set) {
 674        virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
 675        n->set = false;
 676    }
 677}
 678
 679static int vhost_user_set_vring_base(struct vhost_dev *dev,
 680                                     struct vhost_vring_state *ring)
 681{
 682    vhost_user_host_notifier_restore(dev, ring->index);
 683
 684    return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
 685}
 686
 687static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
 688{
 689    int i;
 690
 691    if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
 692        return -1;
 693    }
 694
 695    for (i = 0; i < dev->nvqs; ++i) {
 696        struct vhost_vring_state state = {
 697            .index = dev->vq_index + i,
 698            .num   = enable,
 699        };
 700
 701        vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
 702    }
 703
 704    return 0;
 705}
 706
 707static int vhost_user_get_vring_base(struct vhost_dev *dev,
 708                                     struct vhost_vring_state *ring)
 709{
 710    VhostUserMsg msg = {
 711        .hdr.request = VHOST_USER_GET_VRING_BASE,
 712        .hdr.flags = VHOST_USER_VERSION,
 713        .payload.state = *ring,
 714        .hdr.size = sizeof(msg.payload.state),
 715    };
 716
 717    vhost_user_host_notifier_remove(dev, ring->index);
 718
 719    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 720        return -1;
 721    }
 722
 723    if (vhost_user_read(dev, &msg) < 0) {
 724        return -1;
 725    }
 726
 727    if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
 728        error_report("Received unexpected msg type. Expected %d received %d",
 729                     VHOST_USER_GET_VRING_BASE, msg.hdr.request);
 730        return -1;
 731    }
 732
 733    if (msg.hdr.size != sizeof(msg.payload.state)) {
 734        error_report("Received bad msg size.");
 735        return -1;
 736    }
 737
 738    *ring = msg.payload.state;
 739
 740    return 0;
 741}
 742
 743static int vhost_set_vring_file(struct vhost_dev *dev,
 744                                VhostUserRequest request,
 745                                struct vhost_vring_file *file)
 746{
 747    int fds[VHOST_MEMORY_MAX_NREGIONS];
 748    size_t fd_num = 0;
 749    VhostUserMsg msg = {
 750        .hdr.request = request,
 751        .hdr.flags = VHOST_USER_VERSION,
 752        .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
 753        .hdr.size = sizeof(msg.payload.u64),
 754    };
 755
 756    if (ioeventfd_enabled() && file->fd > 0) {
 757        fds[fd_num++] = file->fd;
 758    } else {
 759        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
 760    }
 761
 762    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 763        return -1;
 764    }
 765
 766    return 0;
 767}
 768
 769static int vhost_user_set_vring_kick(struct vhost_dev *dev,
 770                                     struct vhost_vring_file *file)
 771{
 772    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
 773}
 774
 775static int vhost_user_set_vring_call(struct vhost_dev *dev,
 776                                     struct vhost_vring_file *file)
 777{
 778    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
 779}
 780
 781static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
 782{
 783    VhostUserMsg msg = {
 784        .hdr.request = request,
 785        .hdr.flags = VHOST_USER_VERSION,
 786        .payload.u64 = u64,
 787        .hdr.size = sizeof(msg.payload.u64),
 788    };
 789
 790    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 791        return -1;
 792    }
 793
 794    return 0;
 795}
 796
 797static int vhost_user_set_features(struct vhost_dev *dev,
 798                                   uint64_t features)
 799{
 800    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
 801}
 802
 803static int vhost_user_set_protocol_features(struct vhost_dev *dev,
 804                                            uint64_t features)
 805{
 806    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
 807}
 808
 809static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
 810{
 811    VhostUserMsg msg = {
 812        .hdr.request = request,
 813        .hdr.flags = VHOST_USER_VERSION,
 814    };
 815
 816    if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
 817        return 0;
 818    }
 819
 820    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 821        return -1;
 822    }
 823
 824    if (vhost_user_read(dev, &msg) < 0) {
 825        return -1;
 826    }
 827
 828    if (msg.hdr.request != request) {
 829        error_report("Received unexpected msg type. Expected %d received %d",
 830                     request, msg.hdr.request);
 831        return -1;
 832    }
 833
 834    if (msg.hdr.size != sizeof(msg.payload.u64)) {
 835        error_report("Received bad msg size.");
 836        return -1;
 837    }
 838
 839    *u64 = msg.payload.u64;
 840
 841    return 0;
 842}
 843
 844static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
 845{
 846    return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
 847}
 848
 849static int vhost_user_set_owner(struct vhost_dev *dev)
 850{
 851    VhostUserMsg msg = {
 852        .hdr.request = VHOST_USER_SET_OWNER,
 853        .hdr.flags = VHOST_USER_VERSION,
 854    };
 855
 856    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 857        return -1;
 858    }
 859
 860    return 0;
 861}
 862
 863static int vhost_user_reset_device(struct vhost_dev *dev)
 864{
 865    VhostUserMsg msg = {
 866        .hdr.request = VHOST_USER_RESET_OWNER,
 867        .hdr.flags = VHOST_USER_VERSION,
 868    };
 869
 870    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 871        return -1;
 872    }
 873
 874    return 0;
 875}
 876
 877static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
 878{
 879    int ret = -1;
 880
 881    if (!dev->config_ops) {
 882        return -1;
 883    }
 884
 885    if (dev->config_ops->vhost_dev_config_notifier) {
 886        ret = dev->config_ops->vhost_dev_config_notifier(dev);
 887    }
 888
 889    return ret;
 890}
 891
 892static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
 893                                                       VhostUserVringArea *area,
 894                                                       int fd)
 895{
 896    int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
 897    size_t page_size = qemu_real_host_page_size;
 898    struct vhost_user *u = dev->opaque;
 899    VhostUserState *user = u->user;
 900    VirtIODevice *vdev = dev->vdev;
 901    VhostUserHostNotifier *n;
 902    void *addr;
 903    char *name;
 904
 905    if (!virtio_has_feature(dev->protocol_features,
 906                            VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
 907        vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
 908        return -1;
 909    }
 910
 911    n = &user->notifier[queue_idx];
 912
 913    if (n->addr) {
 914        virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
 915        object_unparent(OBJECT(&n->mr));
 916        munmap(n->addr, page_size);
 917        n->addr = NULL;
 918    }
 919
 920    if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
 921        return 0;
 922    }
 923
 924    /* Sanity check. */
 925    if (area->size != page_size) {
 926        return -1;
 927    }
 928
 929    addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
 930                fd, area->offset);
 931    if (addr == MAP_FAILED) {
 932        return -1;
 933    }
 934
 935    name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
 936                           user, queue_idx);
 937    memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
 938                                      page_size, addr);
 939    g_free(name);
 940
 941    if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
 942        munmap(addr, page_size);
 943        return -1;
 944    }
 945
 946    n->addr = addr;
 947    n->set = true;
 948
 949    return 0;
 950}
 951
 952static void slave_read(void *opaque)
 953{
 954    struct vhost_dev *dev = opaque;
 955    struct vhost_user *u = dev->opaque;
 956    VhostUserHeader hdr = { 0, };
 957    VhostUserPayload payload = { 0, };
 958    int size, ret = 0;
 959    struct iovec iov;
 960    struct msghdr msgh;
 961    int fd[VHOST_USER_SLAVE_MAX_FDS];
 962    char control[CMSG_SPACE(sizeof(fd))];
 963    struct cmsghdr *cmsg;
 964    int i, fdsize = 0;
 965
 966    memset(&msgh, 0, sizeof(msgh));
 967    msgh.msg_iov = &iov;
 968    msgh.msg_iovlen = 1;
 969    msgh.msg_control = control;
 970    msgh.msg_controllen = sizeof(control);
 971
 972    memset(fd, -1, sizeof(fd));
 973
 974    /* Read header */
 975    iov.iov_base = &hdr;
 976    iov.iov_len = VHOST_USER_HDR_SIZE;
 977
 978    size = recvmsg(u->slave_fd, &msgh, 0);
 979    if (size != VHOST_USER_HDR_SIZE) {
 980        error_report("Failed to read from slave.");
 981        goto err;
 982    }
 983
 984    if (msgh.msg_flags & MSG_CTRUNC) {
 985        error_report("Truncated message.");
 986        goto err;
 987    }
 988
 989    for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
 990         cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
 991            if (cmsg->cmsg_level == SOL_SOCKET &&
 992                cmsg->cmsg_type == SCM_RIGHTS) {
 993                    fdsize = cmsg->cmsg_len - CMSG_LEN(0);
 994                    memcpy(fd, CMSG_DATA(cmsg), fdsize);
 995                    break;
 996            }
 997    }
 998
 999    if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1000        error_report("Failed to read msg header."
1001                " Size %d exceeds the maximum %zu.", hdr.size,
1002                VHOST_USER_PAYLOAD_SIZE);
1003        goto err;
1004    }
1005
1006    /* Read payload */
1007    size = read(u->slave_fd, &payload, hdr.size);
1008    if (size != hdr.size) {
1009        error_report("Failed to read payload from slave.");
1010        goto err;
1011    }
1012
1013    switch (hdr.request) {
1014    case VHOST_USER_SLAVE_IOTLB_MSG:
1015        ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1016        break;
1017    case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
1018        ret = vhost_user_slave_handle_config_change(dev);
1019        break;
1020    case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
1021        ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1022                                                          fd[0]);
1023        break;
1024    default:
1025        error_report("Received unexpected msg type.");
1026        ret = -EINVAL;
1027    }
1028
1029    /* Close the remaining file descriptors. */
1030    for (i = 0; i < fdsize; i++) {
1031        if (fd[i] != -1) {
1032            close(fd[i]);
1033        }
1034    }
1035
1036    /*
1037     * REPLY_ACK feature handling. Other reply types has to be managed
1038     * directly in their request handlers.
1039     */
1040    if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1041        struct iovec iovec[2];
1042
1043
1044        hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1045        hdr.flags |= VHOST_USER_REPLY_MASK;
1046
1047        payload.u64 = !!ret;
1048        hdr.size = sizeof(payload.u64);
1049
1050        iovec[0].iov_base = &hdr;
1051        iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1052        iovec[1].iov_base = &payload;
1053        iovec[1].iov_len = hdr.size;
1054
1055        size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
1056        if (size != VHOST_USER_HDR_SIZE + hdr.size) {
1057            error_report("Failed to send msg reply to slave.");
1058            goto err;
1059        }
1060    }
1061
1062    return;
1063
1064err:
1065    qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1066    close(u->slave_fd);
1067    u->slave_fd = -1;
1068    for (i = 0; i < fdsize; i++) {
1069        if (fd[i] != -1) {
1070            close(fd[i]);
1071        }
1072    }
1073    return;
1074}
1075
1076static int vhost_setup_slave_channel(struct vhost_dev *dev)
1077{
1078    VhostUserMsg msg = {
1079        .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
1080        .hdr.flags = VHOST_USER_VERSION,
1081    };
1082    struct vhost_user *u = dev->opaque;
1083    int sv[2], ret = 0;
1084    bool reply_supported = virtio_has_feature(dev->protocol_features,
1085                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
1086
1087    if (!virtio_has_feature(dev->protocol_features,
1088                            VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
1089        return 0;
1090    }
1091
1092    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1093        error_report("socketpair() failed");
1094        return -1;
1095    }
1096
1097    u->slave_fd = sv[0];
1098    qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
1099
1100    if (reply_supported) {
1101        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1102    }
1103
1104    ret = vhost_user_write(dev, &msg, &sv[1], 1);
1105    if (ret) {
1106        goto out;
1107    }
1108
1109    if (reply_supported) {
1110        ret = process_message_reply(dev, &msg);
1111    }
1112
1113out:
1114    close(sv[1]);
1115    if (ret) {
1116        qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1117        close(u->slave_fd);
1118        u->slave_fd = -1;
1119    }
1120
1121    return ret;
1122}
1123
1124/*
1125 * Called back from the postcopy fault thread when a fault is received on our
1126 * ufd.
1127 * TODO: This is Linux specific
1128 */
1129static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1130                                             void *ufd)
1131{
1132    struct vhost_dev *dev = pcfd->data;
1133    struct vhost_user *u = dev->opaque;
1134    struct uffd_msg *msg = ufd;
1135    uint64_t faultaddr = msg->arg.pagefault.address;
1136    RAMBlock *rb = NULL;
1137    uint64_t rb_offset;
1138    int i;
1139
1140    trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1141                                            dev->mem->nregions);
1142    for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1143        trace_vhost_user_postcopy_fault_handler_loop(i,
1144                u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1145        if (faultaddr >= u->postcopy_client_bases[i]) {
1146            /* Ofset of the fault address in the vhost region */
1147            uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1148            if (region_offset < dev->mem->regions[i].memory_size) {
1149                rb_offset = region_offset + u->region_rb_offset[i];
1150                trace_vhost_user_postcopy_fault_handler_found(i,
1151                        region_offset, rb_offset);
1152                rb = u->region_rb[i];
1153                return postcopy_request_shared_page(pcfd, rb, faultaddr,
1154                                                    rb_offset);
1155            }
1156        }
1157    }
1158    error_report("%s: Failed to find region for fault %" PRIx64,
1159                 __func__, faultaddr);
1160    return -1;
1161}
1162
1163static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1164                                     uint64_t offset)
1165{
1166    struct vhost_dev *dev = pcfd->data;
1167    struct vhost_user *u = dev->opaque;
1168    int i;
1169
1170    trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1171
1172    if (!u) {
1173        return 0;
1174    }
1175    /* Translate the offset into an address in the clients address space */
1176    for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1177        if (u->region_rb[i] == rb &&
1178            offset >= u->region_rb_offset[i] &&
1179            offset < (u->region_rb_offset[i] +
1180                      dev->mem->regions[i].memory_size)) {
1181            uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1182                                   u->postcopy_client_bases[i];
1183            trace_vhost_user_postcopy_waker_found(client_addr);
1184            return postcopy_wake_shared(pcfd, client_addr, rb);
1185        }
1186    }
1187
1188    trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1189    return 0;
1190}
1191
1192/*
1193 * Called at the start of an inbound postcopy on reception of the
1194 * 'advise' command.
1195 */
1196static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1197{
1198    struct vhost_user *u = dev->opaque;
1199    CharBackend *chr = u->user->chr;
1200    int ufd;
1201    VhostUserMsg msg = {
1202        .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1203        .hdr.flags = VHOST_USER_VERSION,
1204    };
1205
1206    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1207        error_setg(errp, "Failed to send postcopy_advise to vhost");
1208        return -1;
1209    }
1210
1211    if (vhost_user_read(dev, &msg) < 0) {
1212        error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1213        return -1;
1214    }
1215
1216    if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1217        error_setg(errp, "Unexpected msg type. Expected %d received %d",
1218                     VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1219        return -1;
1220    }
1221
1222    if (msg.hdr.size) {
1223        error_setg(errp, "Received bad msg size.");
1224        return -1;
1225    }
1226    ufd = qemu_chr_fe_get_msgfd(chr);
1227    if (ufd < 0) {
1228        error_setg(errp, "%s: Failed to get ufd", __func__);
1229        return -1;
1230    }
1231    qemu_set_nonblock(ufd);
1232
1233    /* register ufd with userfault thread */
1234    u->postcopy_fd.fd = ufd;
1235    u->postcopy_fd.data = dev;
1236    u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1237    u->postcopy_fd.waker = vhost_user_postcopy_waker;
1238    u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1239    postcopy_register_shared_ufd(&u->postcopy_fd);
1240    return 0;
1241}
1242
1243/*
1244 * Called at the switch to postcopy on reception of the 'listen' command.
1245 */
1246static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1247{
1248    struct vhost_user *u = dev->opaque;
1249    int ret;
1250    VhostUserMsg msg = {
1251        .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1252        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1253    };
1254    u->postcopy_listen = true;
1255    trace_vhost_user_postcopy_listen();
1256    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1257        error_setg(errp, "Failed to send postcopy_listen to vhost");
1258        return -1;
1259    }
1260
1261    ret = process_message_reply(dev, &msg);
1262    if (ret) {
1263        error_setg(errp, "Failed to receive reply to postcopy_listen");
1264        return ret;
1265    }
1266
1267    return 0;
1268}
1269
1270/*
1271 * Called at the end of postcopy
1272 */
1273static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1274{
1275    VhostUserMsg msg = {
1276        .hdr.request = VHOST_USER_POSTCOPY_END,
1277        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1278    };
1279    int ret;
1280    struct vhost_user *u = dev->opaque;
1281
1282    trace_vhost_user_postcopy_end_entry();
1283    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1284        error_setg(errp, "Failed to send postcopy_end to vhost");
1285        return -1;
1286    }
1287
1288    ret = process_message_reply(dev, &msg);
1289    if (ret) {
1290        error_setg(errp, "Failed to receive reply to postcopy_end");
1291        return ret;
1292    }
1293    postcopy_unregister_shared_ufd(&u->postcopy_fd);
1294    u->postcopy_fd.handler = NULL;
1295
1296    trace_vhost_user_postcopy_end_exit();
1297
1298    return 0;
1299}
1300
1301static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1302                                        void *opaque)
1303{
1304    struct PostcopyNotifyData *pnd = opaque;
1305    struct vhost_user *u = container_of(notifier, struct vhost_user,
1306                                         postcopy_notifier);
1307    struct vhost_dev *dev = u->dev;
1308
1309    switch (pnd->reason) {
1310    case POSTCOPY_NOTIFY_PROBE:
1311        if (!virtio_has_feature(dev->protocol_features,
1312                                VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1313            /* TODO: Get the device name into this error somehow */
1314            error_setg(pnd->errp,
1315                       "vhost-user backend not capable of postcopy");
1316            return -ENOENT;
1317        }
1318        break;
1319
1320    case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1321        return vhost_user_postcopy_advise(dev, pnd->errp);
1322
1323    case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1324        return vhost_user_postcopy_listen(dev, pnd->errp);
1325
1326    case POSTCOPY_NOTIFY_INBOUND_END:
1327        return vhost_user_postcopy_end(dev, pnd->errp);
1328
1329    default:
1330        /* We ignore notifications we don't know */
1331        break;
1332    }
1333
1334    return 0;
1335}
1336
1337static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
1338{
1339    uint64_t features, protocol_features;
1340    struct vhost_user *u;
1341    int err;
1342
1343    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1344
1345    u = g_new0(struct vhost_user, 1);
1346    u->user = opaque;
1347    u->slave_fd = -1;
1348    u->dev = dev;
1349    dev->opaque = u;
1350
1351    err = vhost_user_get_features(dev, &features);
1352    if (err < 0) {
1353        return err;
1354    }
1355
1356    if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1357        dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1358
1359        err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1360                                 &protocol_features);
1361        if (err < 0) {
1362            return err;
1363        }
1364
1365        dev->protocol_features =
1366            protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1367
1368        if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1369            /* Don't acknowledge CONFIG feature if device doesn't support it */
1370            dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1371        } else if (!(protocol_features &
1372                    (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1373            error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1374                    "but backend does not support it.");
1375            return -1;
1376        }
1377
1378        err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1379        if (err < 0) {
1380            return err;
1381        }
1382
1383        /* query the max queues we support if backend supports Multiple Queue */
1384        if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1385            err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1386                                     &dev->max_queues);
1387            if (err < 0) {
1388                return err;
1389            }
1390        }
1391
1392        if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1393                !(virtio_has_feature(dev->protocol_features,
1394                    VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1395                 virtio_has_feature(dev->protocol_features,
1396                    VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1397            error_report("IOMMU support requires reply-ack and "
1398                         "slave-req protocol features.");
1399            return -1;
1400        }
1401    }
1402
1403    if (dev->migration_blocker == NULL &&
1404        !virtio_has_feature(dev->protocol_features,
1405                            VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1406        error_setg(&dev->migration_blocker,
1407                   "Migration disabled: vhost-user backend lacks "
1408                   "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1409    }
1410
1411    err = vhost_setup_slave_channel(dev);
1412    if (err < 0) {
1413        return err;
1414    }
1415
1416    u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1417    postcopy_add_notifier(&u->postcopy_notifier);
1418
1419    return 0;
1420}
1421
1422static int vhost_user_backend_cleanup(struct vhost_dev *dev)
1423{
1424    struct vhost_user *u;
1425
1426    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1427
1428    u = dev->opaque;
1429    if (u->postcopy_notifier.notify) {
1430        postcopy_remove_notifier(&u->postcopy_notifier);
1431        u->postcopy_notifier.notify = NULL;
1432    }
1433    if (u->slave_fd >= 0) {
1434        qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1435        close(u->slave_fd);
1436        u->slave_fd = -1;
1437    }
1438    g_free(u->region_rb);
1439    u->region_rb = NULL;
1440    g_free(u->region_rb_offset);
1441    u->region_rb_offset = NULL;
1442    u->region_rb_len = 0;
1443    g_free(u);
1444    dev->opaque = 0;
1445
1446    return 0;
1447}
1448
1449static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1450{
1451    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1452
1453    return idx;
1454}
1455
1456static int vhost_user_memslots_limit(struct vhost_dev *dev)
1457{
1458    return VHOST_MEMORY_MAX_NREGIONS;
1459}
1460
1461static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1462{
1463    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1464
1465    return virtio_has_feature(dev->protocol_features,
1466                              VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1467}
1468
1469static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1470{
1471    VhostUserMsg msg = { };
1472
1473    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1474
1475    /* If guest supports GUEST_ANNOUNCE do nothing */
1476    if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1477        return 0;
1478    }
1479
1480    /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1481    if (virtio_has_feature(dev->protocol_features,
1482                           VHOST_USER_PROTOCOL_F_RARP)) {
1483        msg.hdr.request = VHOST_USER_SEND_RARP;
1484        msg.hdr.flags = VHOST_USER_VERSION;
1485        memcpy((char *)&msg.payload.u64, mac_addr, 6);
1486        msg.hdr.size = sizeof(msg.payload.u64);
1487
1488        return vhost_user_write(dev, &msg, NULL, 0);
1489    }
1490    return -1;
1491}
1492
1493static bool vhost_user_can_merge(struct vhost_dev *dev,
1494                                 uint64_t start1, uint64_t size1,
1495                                 uint64_t start2, uint64_t size2)
1496{
1497    ram_addr_t offset;
1498    int mfd, rfd;
1499    MemoryRegion *mr;
1500
1501    mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
1502    mfd = memory_region_get_fd(mr);
1503
1504    mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
1505    rfd = memory_region_get_fd(mr);
1506
1507    return mfd == rfd;
1508}
1509
1510static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1511{
1512    VhostUserMsg msg;
1513    bool reply_supported = virtio_has_feature(dev->protocol_features,
1514                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
1515
1516    if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
1517        return 0;
1518    }
1519
1520    msg.hdr.request = VHOST_USER_NET_SET_MTU;
1521    msg.payload.u64 = mtu;
1522    msg.hdr.size = sizeof(msg.payload.u64);
1523    msg.hdr.flags = VHOST_USER_VERSION;
1524    if (reply_supported) {
1525        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1526    }
1527
1528    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1529        return -1;
1530    }
1531
1532    /* If reply_ack supported, slave has to ack specified MTU is valid */
1533    if (reply_supported) {
1534        return process_message_reply(dev, &msg);
1535    }
1536
1537    return 0;
1538}
1539
1540static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
1541                                            struct vhost_iotlb_msg *imsg)
1542{
1543    VhostUserMsg msg = {
1544        .hdr.request = VHOST_USER_IOTLB_MSG,
1545        .hdr.size = sizeof(msg.payload.iotlb),
1546        .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1547        .payload.iotlb = *imsg,
1548    };
1549
1550    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1551        return -EFAULT;
1552    }
1553
1554    return process_message_reply(dev, &msg);
1555}
1556
1557
1558static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
1559{
1560    /* No-op as the receive channel is not dedicated to IOTLB messages. */
1561}
1562
1563static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
1564                                 uint32_t config_len)
1565{
1566    VhostUserMsg msg = {
1567        .hdr.request = VHOST_USER_GET_CONFIG,
1568        .hdr.flags = VHOST_USER_VERSION,
1569        .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
1570    };
1571
1572    if (!virtio_has_feature(dev->protocol_features,
1573                VHOST_USER_PROTOCOL_F_CONFIG)) {
1574        return -1;
1575    }
1576
1577    if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
1578        return -1;
1579    }
1580
1581    msg.payload.config.offset = 0;
1582    msg.payload.config.size = config_len;
1583    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1584        return -1;
1585    }
1586
1587    if (vhost_user_read(dev, &msg) < 0) {
1588        return -1;
1589    }
1590
1591    if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
1592        error_report("Received unexpected msg type. Expected %d received %d",
1593                     VHOST_USER_GET_CONFIG, msg.hdr.request);
1594        return -1;
1595    }
1596
1597    if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
1598        error_report("Received bad msg size.");
1599        return -1;
1600    }
1601
1602    memcpy(config, msg.payload.config.region, config_len);
1603
1604    return 0;
1605}
1606
1607static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
1608                                 uint32_t offset, uint32_t size, uint32_t flags)
1609{
1610    uint8_t *p;
1611    bool reply_supported = virtio_has_feature(dev->protocol_features,
1612                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
1613
1614    VhostUserMsg msg = {
1615        .hdr.request = VHOST_USER_SET_CONFIG,
1616        .hdr.flags = VHOST_USER_VERSION,
1617        .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
1618    };
1619
1620    if (!virtio_has_feature(dev->protocol_features,
1621                VHOST_USER_PROTOCOL_F_CONFIG)) {
1622        return -1;
1623    }
1624
1625    if (reply_supported) {
1626        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1627    }
1628
1629    if (size > VHOST_USER_MAX_CONFIG_SIZE) {
1630        return -1;
1631    }
1632
1633    msg.payload.config.offset = offset,
1634    msg.payload.config.size = size,
1635    msg.payload.config.flags = flags,
1636    p = msg.payload.config.region;
1637    memcpy(p, data, size);
1638
1639    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1640        return -1;
1641    }
1642
1643    if (reply_supported) {
1644        return process_message_reply(dev, &msg);
1645    }
1646
1647    return 0;
1648}
1649
1650static int vhost_user_crypto_create_session(struct vhost_dev *dev,
1651                                            void *session_info,
1652                                            uint64_t *session_id)
1653{
1654    bool crypto_session = virtio_has_feature(dev->protocol_features,
1655                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1656    CryptoDevBackendSymSessionInfo *sess_info = session_info;
1657    VhostUserMsg msg = {
1658        .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
1659        .hdr.flags = VHOST_USER_VERSION,
1660        .hdr.size = sizeof(msg.payload.session),
1661    };
1662
1663    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1664
1665    if (!crypto_session) {
1666        error_report("vhost-user trying to send unhandled ioctl");
1667        return -1;
1668    }
1669
1670    memcpy(&msg.payload.session.session_setup_data, sess_info,
1671              sizeof(CryptoDevBackendSymSessionInfo));
1672    if (sess_info->key_len) {
1673        memcpy(&msg.payload.session.key, sess_info->cipher_key,
1674               sess_info->key_len);
1675    }
1676    if (sess_info->auth_key_len > 0) {
1677        memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
1678               sess_info->auth_key_len);
1679    }
1680    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1681        error_report("vhost_user_write() return -1, create session failed");
1682        return -1;
1683    }
1684
1685    if (vhost_user_read(dev, &msg) < 0) {
1686        error_report("vhost_user_read() return -1, create session failed");
1687        return -1;
1688    }
1689
1690    if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
1691        error_report("Received unexpected msg type. Expected %d received %d",
1692                     VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
1693        return -1;
1694    }
1695
1696    if (msg.hdr.size != sizeof(msg.payload.session)) {
1697        error_report("Received bad msg size.");
1698        return -1;
1699    }
1700
1701    if (msg.payload.session.session_id < 0) {
1702        error_report("Bad session id: %" PRId64 "",
1703                              msg.payload.session.session_id);
1704        return -1;
1705    }
1706    *session_id = msg.payload.session.session_id;
1707
1708    return 0;
1709}
1710
1711static int
1712vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
1713{
1714    bool crypto_session = virtio_has_feature(dev->protocol_features,
1715                                       VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1716    VhostUserMsg msg = {
1717        .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
1718        .hdr.flags = VHOST_USER_VERSION,
1719        .hdr.size = sizeof(msg.payload.u64),
1720    };
1721    msg.payload.u64 = session_id;
1722
1723    if (!crypto_session) {
1724        error_report("vhost-user trying to send unhandled ioctl");
1725        return -1;
1726    }
1727
1728    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1729        error_report("vhost_user_write() return -1, close session failed");
1730        return -1;
1731    }
1732
1733    return 0;
1734}
1735
1736static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
1737                                          MemoryRegionSection *section)
1738{
1739    bool result;
1740
1741    result = memory_region_get_fd(section->mr) >= 0;
1742
1743    return result;
1744}
1745
1746VhostUserState *vhost_user_init(void)
1747{
1748    VhostUserState *user = g_new0(struct VhostUserState, 1);
1749
1750    return user;
1751}
1752
1753void vhost_user_cleanup(VhostUserState *user)
1754{
1755    int i;
1756
1757    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1758        if (user->notifier[i].addr) {
1759            object_unparent(OBJECT(&user->notifier[i].mr));
1760            munmap(user->notifier[i].addr, qemu_real_host_page_size);
1761            user->notifier[i].addr = NULL;
1762        }
1763    }
1764}
1765
1766const VhostOps user_ops = {
1767        .backend_type = VHOST_BACKEND_TYPE_USER,
1768        .vhost_backend_init = vhost_user_backend_init,
1769        .vhost_backend_cleanup = vhost_user_backend_cleanup,
1770        .vhost_backend_memslots_limit = vhost_user_memslots_limit,
1771        .vhost_set_log_base = vhost_user_set_log_base,
1772        .vhost_set_mem_table = vhost_user_set_mem_table,
1773        .vhost_set_vring_addr = vhost_user_set_vring_addr,
1774        .vhost_set_vring_endian = vhost_user_set_vring_endian,
1775        .vhost_set_vring_num = vhost_user_set_vring_num,
1776        .vhost_set_vring_base = vhost_user_set_vring_base,
1777        .vhost_get_vring_base = vhost_user_get_vring_base,
1778        .vhost_set_vring_kick = vhost_user_set_vring_kick,
1779        .vhost_set_vring_call = vhost_user_set_vring_call,
1780        .vhost_set_features = vhost_user_set_features,
1781        .vhost_get_features = vhost_user_get_features,
1782        .vhost_set_owner = vhost_user_set_owner,
1783        .vhost_reset_device = vhost_user_reset_device,
1784        .vhost_get_vq_index = vhost_user_get_vq_index,
1785        .vhost_set_vring_enable = vhost_user_set_vring_enable,
1786        .vhost_requires_shm_log = vhost_user_requires_shm_log,
1787        .vhost_migration_done = vhost_user_migration_done,
1788        .vhost_backend_can_merge = vhost_user_can_merge,
1789        .vhost_net_set_mtu = vhost_user_net_set_mtu,
1790        .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
1791        .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
1792        .vhost_get_config = vhost_user_get_config,
1793        .vhost_set_config = vhost_user_set_config,
1794        .vhost_crypto_create_session = vhost_user_crypto_create_session,
1795        .vhost_crypto_close_session = vhost_user_crypto_close_session,
1796        .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
1797};
1798